From 231517429b5387eaf1226105d16a89a29404279f Mon Sep 17 00:00:00 2001
From: 2uropa <ysj1243@daum.net>
Date: Fri, 4 Jan 2019 15:28:03 +0900
Subject: [PATCH 01/29] Modify some windows build options (#285)

* Fix visual studio MULTI-THREADING compile error (#283)

* Add static-library build configuration. (#284)
---
 pthreads/COPKG/pthreads.vcxproj | 45 +++++++++++++++++
 ptools/ptools.vcxproj           | 75 +++++++++++++++++++++++++++
 vmaf.sln                        | 14 +++++
 wrapper/wrapper.vcxproj         | 90 +++++++++++++++++++++++++++++++--
 wrapper/wrapper.vcxproj.filters | 10 +++-
 5 files changed, 230 insertions(+), 4 deletions(-)
diff --git a/pthreads/COPKG/pthreads.vcxproj b/pthreads/COPKG/pthreads.vcxproj
index e6f6a739a..7e5933f81 100644
--- a/pthreads/COPKG/pthreads.vcxproj
+++ b/pthreads/COPKG/pthreads.vcxproj
@@ -1,10 +1,26 @@
 ﻿<?xml version="1.0" encoding="utf-8"?>
 <Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
   <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="DebugLib|Win32">
+      <Configuration>DebugLib</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="DebugLib|x64">
+      <Configuration>DebugLib</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
     <ProjectConfiguration Include="Debug|Win32">
       <Configuration>Debug</Configuration>
       <Platform>Win32</Platform>
     </ProjectConfiguration>
+    <ProjectConfiguration Include="ReleaseLib|Win32">
+      <Configuration>ReleaseLib</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="ReleaseLib|x64">
+      <Configuration>ReleaseLib</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
     <ProjectConfiguration Include="Release|Win32">
       <Configuration>Release</Configuration>
       <Platform>Win32</Platform>
@@ -46,6 +62,7 @@
   -->
   <PropertyGroup Label="Configuration">
     <UseDebugLibraries Condition="'$(Configuration)'=='Debug'">true</UseDebugLibraries>
+    <UseDebugLibraries Condition="'$(Configuration)'=='DebugLib'">true</UseDebugLibraries>
     <UseDebugLibraries Condition="'$(Configuration)'!='Debug'">false</UseDebugLibraries>
     <CharacterSet>MultiByte</CharacterSet>
   </PropertyGroup>
@@ -109,6 +126,19 @@
       <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <RuntimeLibrary Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">MultiThreadedDebug</RuntimeLibrary>
       <CompileAs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Default</CompileAs>
+      <MultiProcessorCompilation Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)'=='DebugLib'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary Condition="'$(Configuration)|$(Platform)'=='DebugLib|x64'">MultiThreadedDebugDLL</RuntimeLibrary>
+      <CompileAs Condition="'$(Configuration)|$(Platform)'=='DebugLib|x64'">Default</CompileAs>
+      <MultiProcessorCompilation Condition="'$(Configuration)|$(Platform)'=='DebugLib|x64'">true</MultiProcessorCompilation>
     </ClCompile>
     <Link>
       <GenerateDebugInformation>true</GenerateDebugInformation>
@@ -121,6 +151,21 @@
       <IntrinsicFunctions>true</IntrinsicFunctions>
       <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <RuntimeLibrary Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MultiThreaded</RuntimeLibrary>
+      <MultiProcessorCompilation Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)'=='ReleaseLib'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary Condition="'$(Configuration)|$(Platform)'=='ReleaseLib|x64'">MultiThreadedDLL</RuntimeLibrary>
+      <MultiProcessorCompilation Condition="'$(Configuration)|$(Platform)'=='ReleaseLib|x64'">true</MultiProcessorCompilation>
     </ClCompile>
     <Link>
       <EnableCOMDATFolding>true</EnableCOMDATFolding>
diff --git a/ptools/ptools.vcxproj b/ptools/ptools.vcxproj
index 06667f304..046e93b2c 100644
--- a/ptools/ptools.vcxproj
+++ b/ptools/ptools.vcxproj
@@ -1,10 +1,18 @@
 ﻿<?xml version="1.0" encoding="utf-8"?>
 <Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
   <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="DebugLib|x64">
+      <Configuration>DebugLib</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
     <ProjectConfiguration Include="Debug|x64">
       <Configuration>Debug</Configuration>
       <Platform>x64</Platform>
     </ProjectConfiguration>
+    <ProjectConfiguration Include="ReleaseLib|x64">
+      <Configuration>ReleaseLib</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
     <ProjectConfiguration Include="Release|x64">
       <Configuration>Release</Configuration>
       <Platform>x64</Platform>
@@ -49,6 +57,12 @@
     <PlatformToolset>v140</PlatformToolset>
     <CharacterSet>MultiByte</CharacterSet>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='DebugLib|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
     <ConfigurationType>StaticLibrary</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
@@ -56,6 +70,13 @@
     <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>MultiByte</CharacterSet>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='ReleaseLib|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
   </ImportGroup>
@@ -63,20 +84,36 @@
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='DebugLib|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='ReleaseLib|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
   <PropertyGroup Label="UserMacros" />
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <Linkage-pthreads>static</Linkage-pthreads>
     <OutDir>$(SolutionDir)\$(Platform)\$(Configuration)\</OutDir>
     <IntDir>$(Platform)\$(Configuration)\</IntDir>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='DebugLib|x64'">
+    <Linkage-pthreads>static</Linkage-pthreads>
+    <OutDir>$(SolutionDir)\$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+  </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <Linkage-pthreads>static</Linkage-pthreads>
     <OutDir>$(SolutionDir)\$(Platform)\$(Configuration)\</OutDir>
     <IntDir>$(Platform)\$(Configuration)\</IntDir>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='ReleaseLib|x64'">
+    <Linkage-pthreads>static</Linkage-pthreads>
+    <OutDir>$(SolutionDir)\$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+  </PropertyGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <ClCompile>
       <PrecompiledHeader>
@@ -87,6 +124,23 @@
       <SDLCheck>true</SDLCheck>
       <AdditionalIncludeDirectories>opencontainers_1_8_4\include;..\pthreads;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='DebugLib|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>PTW32_STATIC_LIB;OC_NEW_STYLE_INCLUDES;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_WARNINGS;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <SDLCheck>true</SDLCheck>
+      <AdditionalIncludeDirectories>opencontainers_1_8_4\include;..\pthreads;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
     </ClCompile>
     <Link>
       <SubSystem>Windows</SubSystem>
@@ -104,6 +158,27 @@
       <SDLCheck>true</SDLCheck>
       <AdditionalIncludeDirectories>opencontainers_1_8_4\include;..\pthreads;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='ReleaseLib|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>PTW32_STATIC_LIB;OC_NEW_STYLE_INCLUDES;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_WARNINGS;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <SDLCheck>true</SDLCheck>
+      <AdditionalIncludeDirectories>opencontainers_1_8_4\include;..\pthreads;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
     </ClCompile>
     <Link>
       <SubSystem>Windows</SubSystem>
diff --git a/vmaf.sln b/vmaf.sln
index 2d91e566d..ea83da7ab 100644
--- a/vmaf.sln
+++ b/vmaf.sln
@@ -19,21 +19,35 @@ EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
 		Debug|x64 = Debug|x64
+		DebugLib|x64 = DebugLib|x64
 		Release|x64 = Release|x64
+		ReleaseLib|x64 = ReleaseLib|x64
 	EndGlobalSection
 	GlobalSection(ProjectConfigurationPlatforms) = postSolution
 		{C2D3FD1E-9068-494D-9655-88CE906B4C8B}.Debug|x64.ActiveCfg = Debug|x64
 		{C2D3FD1E-9068-494D-9655-88CE906B4C8B}.Debug|x64.Build.0 = Debug|x64
+		{C2D3FD1E-9068-494D-9655-88CE906B4C8B}.DebugLib|x64.ActiveCfg = DebugLib|x64
+		{C2D3FD1E-9068-494D-9655-88CE906B4C8B}.DebugLib|x64.Build.0 = DebugLib|x64
 		{C2D3FD1E-9068-494D-9655-88CE906B4C8B}.Release|x64.ActiveCfg = Release|x64
 		{C2D3FD1E-9068-494D-9655-88CE906B4C8B}.Release|x64.Build.0 = Release|x64
+		{C2D3FD1E-9068-494D-9655-88CE906B4C8B}.ReleaseLib|x64.ActiveCfg = ReleaseLib|x64
+		{C2D3FD1E-9068-494D-9655-88CE906B4C8B}.ReleaseLib|x64.Build.0 = ReleaseLib|x64
 		{3F07B371-1B81-477E-886C-0E079B0A6803}.Debug|x64.ActiveCfg = Debug|x64
 		{3F07B371-1B81-477E-886C-0E079B0A6803}.Debug|x64.Build.0 = Debug|x64
+		{3F07B371-1B81-477E-886C-0E079B0A6803}.DebugLib|x64.ActiveCfg = DebugLib|x64
+		{3F07B371-1B81-477E-886C-0E079B0A6803}.DebugLib|x64.Build.0 = DebugLib|x64
 		{3F07B371-1B81-477E-886C-0E079B0A6803}.Release|x64.ActiveCfg = Release|x64
 		{3F07B371-1B81-477E-886C-0E079B0A6803}.Release|x64.Build.0 = Release|x64
+		{3F07B371-1B81-477E-886C-0E079B0A6803}.ReleaseLib|x64.ActiveCfg = ReleaseLib|x64
+		{3F07B371-1B81-477E-886C-0E079B0A6803}.ReleaseLib|x64.Build.0 = ReleaseLib|x64
 		{03A4C79F-F1A8-48C9-A2AC-0A14EC0F093E}.Debug|x64.ActiveCfg = Debug|x64
 		{03A4C79F-F1A8-48C9-A2AC-0A14EC0F093E}.Debug|x64.Build.0 = Debug|x64
+		{03A4C79F-F1A8-48C9-A2AC-0A14EC0F093E}.DebugLib|x64.ActiveCfg = DebugLib|x64
+		{03A4C79F-F1A8-48C9-A2AC-0A14EC0F093E}.DebugLib|x64.Build.0 = DebugLib|x64
 		{03A4C79F-F1A8-48C9-A2AC-0A14EC0F093E}.Release|x64.ActiveCfg = Release|x64
 		{03A4C79F-F1A8-48C9-A2AC-0A14EC0F093E}.Release|x64.Build.0 = Release|x64
+		{03A4C79F-F1A8-48C9-A2AC-0A14EC0F093E}.ReleaseLib|x64.ActiveCfg = ReleaseLib|x64
+		{03A4C79F-F1A8-48C9-A2AC-0A14EC0F093E}.ReleaseLib|x64.Build.0 = ReleaseLib|x64
 	EndGlobalSection
 	GlobalSection(SolutionProperties) = preSolution
 		HideSolutionNode = FALSE
diff --git a/wrapper/wrapper.vcxproj b/wrapper/wrapper.vcxproj
index e54b48ce7..639953500 100644
--- a/wrapper/wrapper.vcxproj
+++ b/wrapper/wrapper.vcxproj
@@ -1,10 +1,18 @@
 ﻿<?xml version="1.0" encoding="utf-8"?>
 <Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
   <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="DebugLib|x64">
+      <Configuration>DebugLib</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
     <ProjectConfiguration Include="Debug|x64">
       <Configuration>Debug</Configuration>
       <Platform>x64</Platform>
     </ProjectConfiguration>
+    <ProjectConfiguration Include="ReleaseLib|x64">
+      <Configuration>ReleaseLib</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
     <ProjectConfiguration Include="Release|x64">
       <Configuration>Release</Configuration>
       <Platform>x64</Platform>
@@ -24,6 +32,12 @@
     <PlatformToolset>v140</PlatformToolset>
     <CharacterSet>MultiByte</CharacterSet>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='DebugLib|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
@@ -31,6 +45,13 @@
     <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>MultiByte</CharacterSet>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='ReleaseLib|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
   </ImportGroup>
@@ -38,28 +59,62 @@
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='DebugLib|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='ReleaseLib|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
   <PropertyGroup Label="UserMacros" />
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <LinkIncremental>true</LinkIncremental>
     <Linkage-pthreads>static</Linkage-pthreads>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='DebugLib|x64'">
+    <LinkIncremental>true</LinkIncremental>
+    <Linkage-pthreads>static</Linkage-pthreads>
+  </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <LinkIncremental>false</LinkIncremental>
     <Linkage-pthreads>static</Linkage-pthreads>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='ReleaseLib|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <Linkage-pthreads>static</Linkage-pthreads>
+  </PropertyGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <ClCompile>
       <PrecompiledHeader>
       </PrecompiledHeader>
       <WarningLevel>Level3</WarningLevel>
       <Optimization>Disabled</Optimization>
-      <PreprocessorDefinitions>_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;PTW32_STATIC_LIB;OC_NEW_STYLE_INCLUDES;WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <PreprocessorDefinitions>MULTI_THREADING;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;PTW32_STATIC_LIB;OC_NEW_STYLE_INCLUDES;WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <SDLCheck>true</SDLCheck>
       <AdditionalIncludeDirectories>..\feature\src\common;..\ptools;..\ptools\opencontainers_1_8_4\include;..\pthreads;..\feature\src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalDependencies>libpthread-static.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalLibraryDirectories>$(SolutionDir)\$(Platform)\$(Configuration)\;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='DebugLib|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>MULTI_THREADING;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;PTW32_STATIC_LIB;OC_NEW_STYLE_INCLUDES;WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <SDLCheck>true</SDLCheck>
+      <AdditionalIncludeDirectories>..\feature\src\common;..\ptools;..\ptools\opencontainers_1_8_4\include;..\pthreads;..\feature\src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
     </ClCompile>
     <Link>
       <SubSystem>Console</SubSystem>
@@ -76,11 +131,36 @@
       <Optimization>MaxSpeed</Optimization>
       <FunctionLevelLinking>true</FunctionLevelLinking>
       <IntrinsicFunctions>true</IntrinsicFunctions>
-      <PreprocessorDefinitions>_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;PTW32_STATIC_LIB;OC_NEW_STYLE_INCLUDES;WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <PreprocessorDefinitions>MULTI_THREADING;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;PTW32_STATIC_LIB;OC_NEW_STYLE_INCLUDES;WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <SDLCheck>true</SDLCheck>
       <AdditionalIncludeDirectories>..\feature\src\common;..\ptools;..\ptools\opencontainers_1_8_4\include;..\pthreads;..\feature\src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
       <EnableEnhancedInstructionSet>AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalDependencies>libpthread-static.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalLibraryDirectories>$(SolutionDir)\$(Platform)\$(Configuration)\;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='ReleaseLib|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>MULTI_THREADING;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;PTW32_STATIC_LIB;OC_NEW_STYLE_INCLUDES;WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <SDLCheck>true</SDLCheck>
+      <AdditionalIncludeDirectories>..\feature\src\common;..\ptools;..\ptools\opencontainers_1_8_4\include;..\pthreads;..\feature\src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
     </ClCompile>
     <Link>
       <SubSystem>Console</SubSystem>
@@ -98,6 +178,7 @@
     <ClInclude Include="..\feature\src\ansnr_tools.h" />
     <ClInclude Include="..\feature\src\common\alignment.h" />
     <ClInclude Include="..\feature\src\common\alloc.h" />
+    <ClInclude Include="..\feature\src\common\blur_array.h" />
     <ClInclude Include="..\feature\src\common\convolution.h" />
     <ClInclude Include="..\feature\src\common\convolution_internal.h" />
     <ClInclude Include="..\feature\src\common\cpu.h" />
@@ -119,6 +200,7 @@
     <ClInclude Include="..\feature\src\vif_options.h" />
     <ClInclude Include="..\feature\src\vif_tools.h" />
     <ClInclude Include="src\combo.h" />
+    <ClInclude Include="src\cpu_info.h" />
     <ClInclude Include="src\darray.h" />
     <ClInclude Include="src\libvmaf.h" />
     <ClInclude Include="src\pugixml\pugiconfig.hpp" />
@@ -134,6 +216,7 @@
     <ClCompile Include="..\feature\src\ansnr_tools.c" />
     <ClCompile Include="..\feature\src\common\alloc.c" />
     <ClCompile Include="..\feature\src\common\alignment.c" />
+    <ClCompile Include="..\feature\src\common\blur_array.c" />
     <ClCompile Include="..\feature\src\common\convolution.c" />
     <ClCompile Include="..\feature\src\common\convolution_avx.c" />
     <ClCompile Include="..\feature\src\common\cpu.c" />
@@ -151,6 +234,7 @@
     <ClCompile Include="..\feature\src\vif.c" />
     <ClCompile Include="..\feature\src\vif_tools.c" />
     <ClCompile Include="src\combo.c" />
+    <ClCompile Include="src\cpu_info.c" />
     <ClCompile Include="src\darray.c" />
     <ClCompile Include="src\libvmaf.cpp" />
     <ClCompile Include="src\main.cpp" />
@@ -164,4 +248,4 @@
     </ProjectReference>
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
-</Project>
+</Project>
\ No newline at end of file
diff --git a/wrapper/wrapper.vcxproj.filters b/wrapper/wrapper.vcxproj.filters
index 34fa83ad9..ed47ca6e3 100644
--- a/wrapper/wrapper.vcxproj.filters
+++ b/wrapper/wrapper.vcxproj.filters
@@ -76,6 +76,10 @@
       <Filter>feature</Filter>
     </ClCompile>
     <ClCompile Include="src\libvmaf.cpp" />
+    <ClCompile Include="..\feature\src\common\blur_array.c">
+      <Filter>feature</Filter>
+    </ClCompile>
+    <ClCompile Include="src\cpu_info.c" />
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="src\svm.h" />
@@ -168,6 +172,10 @@
     <ClInclude Include="src\combo.h" />
     <ClInclude Include="src\timer.h" />
     <ClInclude Include="src\libvmaf.h" />
+    <ClInclude Include="..\feature\src\common\blur_array.h">
+      <Filter>feature</Filter>
+    </ClInclude>
+    <ClInclude Include="src\cpu_info.h" />
   </ItemGroup>
   <ItemGroup>
     <Filter Include="feature">
@@ -183,4 +191,4 @@
       <UniqueIdentifier>{4b256925-f7e1-49e2-9c3b-33c128ce4e37}</UniqueIdentifier>
     </Filter>
   </ItemGroup>
-</Project>
+</Project>
\ No newline at end of file

From 76dc78e601dd7983f85cb1699ff383a9cd59570c Mon Sep 17 00:00:00 2001
From: Christos Bampis <christosb@netflix.com>
Date: Mon, 7 Jan 2019 15:09:58 -0800
Subject: [PATCH 02/29] Add matlab imports in run_testing; run mex for ST-MAD.

---
 matlab/STMAD_2011_MatlabCode/Example.m        | 17 ++-----------
 python/script/run_testing.py                  |  1 +
 .../src/vmaf/core/matlab_feature_extractor.py | 25 +++++++++++++++++--
 python/src/vmaf/core/matlab_quality_runner.py |  5 ++++
 4 files changed, 31 insertions(+), 17 deletions(-)

diff --git a/matlab/STMAD_2011_MatlabCode/Example.m b/matlab/STMAD_2011_MatlabCode/Example.m
index b513ef2bc..13670002f 100644
--- a/matlab/STMAD_2011_MatlabCode/Example.m
+++ b/matlab/STMAD_2011_MatlabCode/Example.m
@@ -2,22 +2,9 @@
 clear;
 close all;
 
-% Wid = 176;
-% Hei = 144;
-
-% OrgFile = 'foreman_org_qcif.yuv';
-
-% DstFile = 'foreman_dst_qcif.yuv';
-
-% MadVals = STMAD_index(OrgFile, DstFile, Wid, Hei);
-% MadVals = run_stmad(OrgFile, DstFile, Wid, Hei);
-
 Hei = 324;
 Wid = 576;
-OrgFile = ...
-    '/home/cbampis/Projects/stash/MCE/vmaf_oss/vmaf/python/test/resource/yuv/src01_hrc00_576x324.yuv';
-DstFile = ...
-    '/home/cbampis/Projects/stash/MCE/vmaf_oss/vmaf/python/test/resource/yuv/src01_hrc01_576x324.yuv';
+OrgFile = '../../python/test/resource/yuv/src01_hrc00_576x324.yuv';
+DstFile = '../../python/test/resource/yuv/src01_hrc01_576x324.yuv';
 
 run_stmad(OrgFile, DstFile, Wid, Hei);
-
diff --git a/python/script/run_testing.py b/python/script/run_testing.py
index 885cb5087..7ae2621dc 100755
--- a/python/script/run_testing.py
+++ b/python/script/run_testing.py
@@ -12,6 +12,7 @@
 from vmaf.core.result_store import FileSystemResultStore
 from vmaf.tools.misc import import_python_file, get_cmd_option, cmd_option_exists
 from vmaf.core.quality_runner import QualityRunner, VmafQualityRunner, BootstrapVmafQualityRunner
+from vmaf.core.matlab_quality_runner import STMADQualityRunner, SpEEDMatlabQualityRunner, StrredQualityRunner, StrredOptQualityRunner
 from vmaf.routine import run_test_on_dataset, print_matplotlib_warning
 from vmaf.tools.stats import ListStats
 
diff --git a/python/src/vmaf/core/matlab_feature_extractor.py b/python/src/vmaf/core/matlab_feature_extractor.py
index 1953589bb..480172e60 100644
--- a/python/src/vmaf/core/matlab_feature_extractor.py
+++ b/python/src/vmaf/core/matlab_feature_extractor.py
@@ -154,8 +154,6 @@ def _generate_result(self, asset):
                 log_file_path=log_file_path,
             )
 
-            # print(strred_cmd)
-
             if self.logger:
                 self.logger.info(strredopt_cmd)
 
@@ -296,6 +294,28 @@ class STMADFeatureExtractor(MatlabFeatureExtractor):
 
     MATLAB_WORKSPACE = VmafConfig.root_path('matlab', 'STMAD_2011_MatlabCode')
 
+    # compile necessary functions; need to use mex from within matlab
+    def _custom_init(self):
+
+        def run_stmad_cmd(stmad_cmd):
+
+            current_dir = os.getcwd() + '/'
+            os.chdir(self.MATLAB_WORKSPACE)
+            run_process(stmad_cmd, shell=True)
+            os.chdir(current_dir)
+
+        stmad_mex_cmd_1 = '''{matlab} -nodisplay -nosplash -nodesktop -r "mex ical_std.c; exit;"'''.format(
+            matlab=VmafExternalConfig.get_and_assert_matlab(),
+        )
+
+        run_stmad_cmd(stmad_mex_cmd_1)
+
+        stmad_mex_cmd_2 = '''{matlab} -nodisplay -nosplash -nodesktop -r "mex ical_stat.c; exit;"'''.format(
+            matlab=VmafExternalConfig.get_and_assert_matlab(),
+        )
+
+        run_stmad_cmd(stmad_mex_cmd_2)
+
     @classmethod
     def _assert_an_asset(cls, asset):
         super(STMADFeatureExtractor, cls)._assert_an_asset(asset)
@@ -326,6 +346,7 @@ def _generate_result(self, asset):
             h=quality_height,
             log_file_path=log_file_path,
         )
+
         if self.logger:
             self.logger.info(stmad_cmd)
 
diff --git a/python/src/vmaf/core/matlab_quality_runner.py b/python/src/vmaf/core/matlab_quality_runner.py
index c565779ca..63acec42a 100644
--- a/python/src/vmaf/core/matlab_quality_runner.py
+++ b/python/src/vmaf/core/matlab_quality_runner.py
@@ -1,3 +1,7 @@
+import os
+
+from vmaf.config import VmafExternalConfig, VmafConfig
+from vmaf.tools.misc import run_process
 from vmaf.core.feature_assembler import FeatureAssembler
 from vmaf.core.matlab_feature_extractor import StrredFeatureExtractor, StrredOptFeatureExtractor, SpEEDMatlabFeatureExtractor, STMADFeatureExtractor
 from vmaf.core.quality_runner import QualityRunner
@@ -162,6 +166,7 @@ def _remove_result(self, asset):
 
 
 class STMADQualityRunner(QualityRunner):
+
     TYPE = 'STMAD'
 
     VERSION = 'F' + STMADFeatureExtractor.VERSION + '-1.1'

From b87b5c74471b790c179dd73de2bbb44b8a76ff63 Mon Sep 17 00:00:00 2001
From: Christos Bampis <christosb@netflix.com>
Date: Mon, 7 Jan 2019 15:51:31 -0800
Subject: [PATCH 03/29] Add documentation for Matlab implementations.

---
 README.md                    |  4 ++++
 resource/doc/matlab_usage.md | 40 ++++++++++++++++++++++++++++++++++++
 2 files changed, 44 insertions(+)
 create mode 100644 resource/doc/matlab_usage.md

diff --git a/README.md b/README.md
index 4e799b715..8e04f861e 100644
--- a/README.md
+++ b/README.md
@@ -40,6 +40,10 @@ Besides the default VMAF model which predicts the quality of videos displayed on
 
 Since VDK v1.3.7 (June 2018), we have introduced a way to quantify the level of confidence that a VMAF prediction entails. Each VMAF prediction score now can come with a 95% confidence interval (CI), which quantifies the level of confidence that the prediction lies within the interval. Refer to the [VMAF confidence interval](resource/doc/conf_interval.md) page for more details.
 
+## Matlab Functionality
+
+Besides the Python/C/C++ part of the repository, we also introduced a number of algorithms that are implemented in Matlab. For example, users can calculate ST-RRED, ST-MAD, SpEED-QA, and BRISQUE. For more details, see the [Matlab Usage](resource/doc/matlab_usage.md) page for more details.
+
 ## References
 
 Refer to the [references](resource/doc/references.md) page.
diff --git a/resource/doc/matlab_usage.md b/resource/doc/matlab_usage.md
new file mode 100644
index 000000000..bb8638b77
--- /dev/null
+++ b/resource/doc/matlab_usage.md
@@ -0,0 +1,40 @@
+Matlab Usage
+===================
+
+## Prerequisites
+
+To be able to use Matlab, you need to first successfully download and activate Matlab, as described [here](https://www.mathworks.com/). Then, add a line to python/src/vmaf/externals.py, like:
+
+```
+MATLAB_PATH = <path_to_matlab>
+```
+
+For example (mac OSX):
+
+```
+MATLAB_PATH = "/Applications/MATLAB_R2017a.app/bin/matlab" 
+```
+
+## Available Algorithms
+
+The available algorithms are ST-MAD [1], ST-RRED [2], SpEED-QA [3] and BRISQUE [4].
+
+Example usage for ST-MAD, ST-RRED and SpEED-QA with the run_testing script:
+
+./run_testing quality_type dataset_file
+
+where quality_type can be STMAD (for ST-MAD), STRRED (for ST-RRED) and SpEED_Matlab (for SpEED-QA). We have also implemented a computationally efficient version of ST-RRED (use STRREDOpt as the quality_type), which produces numerically identical results to ST-RRED.
+
+Example usage for BRISQUE:
+
+./run_vmaf yuv_420p 1920 1080 NFLX_dataset_public/ref/OldTownCross_25fps.yuv NFLX_dataset_public/dis/OldTownCross_90_1080_4300.yuv --model model/vmaf_brisque_all_v0.0rc.pkl
+
+## References
+
+[1] P. V. Vu, C. T. Vu, and D. M. Chandler, "A spatiotemporal mostapparent-distortion model for video quality assessment," IEEE Int’l Conf. Image Process., pp. 2505–2508, 2011.
+
+[2] R. Soundararajan and A. C. Bovik, "Video quality assessment by reduced reference spatio-temporal entropic differencing," IEEE Trans. Circ. Syst. Video Technol., vol. 23, no. 4, pp. 684–694, Apr. 2013.
+
+[3] C. G. Bampis, P. Gupta, R. Soundararajan, and A. C. Bovik, "SpEEDQA: Spatial efficient entropic differencing for image and video quality," IEEE Signal Process. Lett., vol. 24, no. 9, pp. 1333–1337, 2017.
+
+[4] A. Mittal, A. K. Moorthy, and A. C. Bovik, "No-reference image quality assessment in the spatial domain," IEEE Trans. Image Process., vol. 21, no. 12, pp. 4695–4708, Dec. 2012.

From f88761224162f0b6337fc91e5c117fcca4baa9e2 Mon Sep 17 00:00:00 2001
From: Zhi Li <zli@netflix.com>
Date: Thu, 10 Jan 2019 13:47:36 -0800
Subject: [PATCH 04/29] Update conf_interval.md

---
 resource/doc/conf_interval.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/resource/doc/conf_interval.md b/resource/doc/conf_interval.md
index 03eed94fa..8b0034bed 100644
--- a/resource/doc/conf_interval.md
+++ b/resource/doc/conf_interval.md
@@ -11,7 +11,7 @@ There are two ways to perform bootstrapping on VMAF. The first one is called pla
 
 ### Run in Command Line
 
-To enable CI, use the option `--ci` in the command line tools with a bootstrapping model such as `model/vmaf_rb_v0.6.2/vmaf_rb_v0.6.2.pkl`.
+To enable CI, use the option `--ci` in the command line tools with a bootstrapping model such as `model/vmaf_rb_v0.6.2/vmaf_rb_v0.6.2.pkl`. The `--ci` option is available for both `./run_vmaf` and `./wrapper/vmafossexec`. In [libvmaf](libvmaf.md), CI can be enabled by setting the argument `enable_conf_interval` to 1.
 
 For example, running
 

From 3c169056c20a4d158c7f08fd3d072cc04c29234e Mon Sep 17 00:00:00 2001
From: fishjam <fishjam@163.com>
Date: Sat, 12 Jan 2019 03:05:42 +0800
Subject: [PATCH 05/29] refactor windows solution: (#291)

* refactor windows solution:
1.add libvmaf(static lib), get information from wrapper;
2.remove unused DebugLib and ReleaseLib(since there is libvmaf, does not need build lib from wrapper.vcxproj)
3.remove unused x86 configuration.

* 1.add project for feature tools(ref feature\Makefile) and add some missing files in libvmaf
2.add feature/examples.bat(ref feature\examples) and will copy to output dir while building;
3.add BuildForWindows.md

* Update conf_interval.md
---
 BuildForWindows.md                            |   8 +
 feature/examples.bat                          |  33 +++
 feature/vs2015/moment/moment.vcxproj          |  99 ++++++++
 feature/vs2015/moment/moment.vcxproj.filters  |  22 ++
 feature/vs2015/ms_ssim/ms_ssim.vcxproj        |  99 ++++++++
 .../vs2015/ms_ssim/ms_ssim.vcxproj.filters    |  22 ++
 feature/vs2015/psnr/psnr.vcxproj              |  99 ++++++++
 feature/vs2015/psnr/psnr.vcxproj.filters      |  22 ++
 feature/vs2015/ssim/ssim.vcxproj              |  95 +++++++
 feature/vs2015/ssim/ssim.vcxproj.filters      |  22 ++
 feature/vs2015/vmaf/vmaf.vcxproj              |  95 +++++++
 feature/vs2015/vmaf/vmaf.vcxproj.filters      |  22 ++
 pthreads/COPKG/pthreads.vcxproj               |  51 ----
 ptools/ptools.vcxproj                         |  73 ------
 vmaf.sln                                      |  76 +++++-
 wrapper/libvmaf.vcxproj                       | 160 ++++++++++++
 wrapper/libvmaf.vcxproj.filters               | 240 ++++++++++++++++++
 wrapper/wrapper.vcxproj                       | 158 +-----------
 wrapper/wrapper.vcxproj.filters               | 188 --------------
 19 files changed, 1110 insertions(+), 474 deletions(-)
 create mode 100644 BuildForWindows.md
 create mode 100644 feature/examples.bat
 create mode 100644 feature/vs2015/moment/moment.vcxproj
 create mode 100644 feature/vs2015/moment/moment.vcxproj.filters
 create mode 100644 feature/vs2015/ms_ssim/ms_ssim.vcxproj
 create mode 100644 feature/vs2015/ms_ssim/ms_ssim.vcxproj.filters
 create mode 100644 feature/vs2015/psnr/psnr.vcxproj
 create mode 100644 feature/vs2015/psnr/psnr.vcxproj.filters
 create mode 100644 feature/vs2015/ssim/ssim.vcxproj
 create mode 100644 feature/vs2015/ssim/ssim.vcxproj.filters
 create mode 100644 feature/vs2015/vmaf/vmaf.vcxproj
 create mode 100644 feature/vs2015/vmaf/vmaf.vcxproj.filters
 create mode 100644 wrapper/libvmaf.vcxproj
 create mode 100644 wrapper/libvmaf.vcxproj.filters

diff --git a/BuildForWindows.md b/BuildForWindows.md
new file mode 100644
index 000000000..1200f5269
--- /dev/null
+++ b/BuildForWindows.md
@@ -0,0 +1,8 @@
+# Prepare
+  - Visual Studio 2015 on Windows
+  
+# Steps
+  - 1.open [vmaf.sln](vmaf.sln) in Visual Studio 2015 
+  - 2.Select Build => Batch Build from menu
+  - 3.Select the Solution Config as your want or Select All, then Build.
+  - 4.After Build, you will find all the build result in $(SolutionDir)/x64/$(Configuration), and there is examples.bat that you can run.
diff --git a/feature/examples.bat b/feature/examples.bat
new file mode 100644
index 000000000..fac258ff9
--- /dev/null
+++ b/feature/examples.bat
@@ -0,0 +1,33 @@
+rem refer feature/examples
+
+vmaf
+echo ""
+
+echo "run adm:"
+vmaf adm yuv420p ..\..\python\test\resource\yuv\src01_hrc00_576x324.yuv ..\..\python\test\resource\yuv\src01_hrc01_576x324.yuv 576 324
+
+echo "run ansnr:"
+vmaf ansnr yuv420p ..\..\python\test\resource\yuv\src01_hrc00_576x324.yuv ..\..\python\test\resource\yuv\src01_hrc01_576x324.yuv 576 324
+
+echo "run motion:"
+vmaf motion yuv420p ..\..\python\test\resource\yuv\src01_hrc00_576x324.yuv ..\..\python\test\resource\yuv\src01_hrc01_576x324.yuv 576 324
+
+echo "run vif:"
+vmaf vif yuv420p ..\..\python\test\resource\yuv\src01_hrc00_576x324.yuv ..\..\python\test\resource\yuv\src01_hrc01_576x324.yuv 576 324
+
+echo "run all:"
+vmaf all yuv420p ..\..\python\test\resource\yuv\src01_hrc00_576x324.yuv ..\..\python\test\resource\yuv\src01_hrc01_576x324.yuv 576 324
+
+echo "run psnr:"
+psnr yuv420p ..\..\python\test\resource\yuv\src01_hrc00_576x324.yuv ..\..\python\test\resource\yuv\src01_hrc01_576x324.yuv 576 324
+
+echo "run 2nd moment:"
+moment 2 yuv420p ..\..\python\test\resource\yuv\src01_hrc00_576x324.yuv 576 324
+
+echo "run ssim:"
+ssim yuv420p ..\..\python\test\resource\yuv\src01_hrc00_576x324.yuv ..\..\python\test\resource\yuv\src01_hrc01_576x324.yuv 576 324
+
+echo "run ms_ssim:"
+ms_ssim yuv420p ..\..\python\test\resource\yuv\src01_hrc00_576x324.yuv ..\..\python\test\resource\yuv\src01_hrc01_576x324.yuv 576 324
+
+echo "done."
diff --git a/feature/vs2015/moment/moment.vcxproj b/feature/vs2015/moment/moment.vcxproj
new file mode 100644
index 000000000..4215cd149
--- /dev/null
+++ b/feature/vs2015/moment/moment.vcxproj
@@ -0,0 +1,99 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\src\moment_main.c" />
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{D67BDBF2-D42F-465D-BABD-A381BFFAA373}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>moment</RootNamespace>
+    <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="Shared">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>MULTI_THREADING;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;PTW32_STATIC_LIB;OC_NEW_STYLE_INCLUDES;WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\feature\src\common;..\ptools;..\ptools\opencontainers_1_8_4\include;..\pthreads;..\feature\src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <SDLCheck>true</SDLCheck>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalLibraryDirectories>$(SolutionDir)\$(Platform)\$(Configuration)\;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>libvmaf.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>MULTI_THREADING;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;PTW32_STATIC_LIB;OC_NEW_STYLE_INCLUDES;WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\feature\src\common;..\ptools;..\ptools\opencontainers_1_8_4\include;..\pthreads;..\feature\src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <SDLCheck>true</SDLCheck>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalLibraryDirectories>$(SolutionDir)\$(Platform)\$(Configuration)\;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>libvmaf.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/feature/vs2015/moment/moment.vcxproj.filters b/feature/vs2015/moment/moment.vcxproj.filters
new file mode 100644
index 000000000..4acb18b59
--- /dev/null
+++ b/feature/vs2015/moment/moment.vcxproj.filters
@@ -0,0 +1,22 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\src\moment_main.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/feature/vs2015/ms_ssim/ms_ssim.vcxproj b/feature/vs2015/ms_ssim/ms_ssim.vcxproj
new file mode 100644
index 000000000..c69acbfc9
--- /dev/null
+++ b/feature/vs2015/ms_ssim/ms_ssim.vcxproj
@@ -0,0 +1,99 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\src\ms_ssim_main.c" />
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{CF8FA427-306B-4803-9F23-31C229A630B6}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>ms_ssim</RootNamespace>
+    <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="Shared">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>MULTI_THREADING;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;PTW32_STATIC_LIB;OC_NEW_STYLE_INCLUDES;WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <SDLCheck>true</SDLCheck>
+      <AdditionalIncludeDirectories>..\feature\src\common;..\ptools;..\ptools\opencontainers_1_8_4\include;..\pthreads;..\feature\src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalLibraryDirectories>$(SolutionDir)\$(Platform)\$(Configuration)\;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>libvmaf.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>MULTI_THREADING;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;PTW32_STATIC_LIB;OC_NEW_STYLE_INCLUDES;WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <SDLCheck>true</SDLCheck>
+      <AdditionalIncludeDirectories>..\feature\src\common;..\ptools;..\ptools\opencontainers_1_8_4\include;..\pthreads;..\feature\src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalLibraryDirectories>$(SolutionDir)\$(Platform)\$(Configuration)\;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>libvmaf.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/feature/vs2015/ms_ssim/ms_ssim.vcxproj.filters b/feature/vs2015/ms_ssim/ms_ssim.vcxproj.filters
new file mode 100644
index 000000000..3de31b095
--- /dev/null
+++ b/feature/vs2015/ms_ssim/ms_ssim.vcxproj.filters
@@ -0,0 +1,22 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\src\ms_ssim_main.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/feature/vs2015/psnr/psnr.vcxproj b/feature/vs2015/psnr/psnr.vcxproj
new file mode 100644
index 000000000..dcaa53f6d
--- /dev/null
+++ b/feature/vs2015/psnr/psnr.vcxproj
@@ -0,0 +1,99 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\src\psnr_main.c" />
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{2DC3E418-09C6-49F4-A8DA-04C614D6016D}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>psnr</RootNamespace>
+    <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="Shared">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>MULTI_THREADING;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;PTW32_STATIC_LIB;OC_NEW_STYLE_INCLUDES;WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\feature\src\common;..\ptools;..\ptools\opencontainers_1_8_4\include;..\pthreads;..\feature\src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <SDLCheck>true</SDLCheck>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalLibraryDirectories>$(SolutionDir)\$(Platform)\$(Configuration)\;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>libvmaf.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>MULTI_THREADING;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;PTW32_STATIC_LIB;OC_NEW_STYLE_INCLUDES;WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\feature\src\common;..\ptools;..\ptools\opencontainers_1_8_4\include;..\pthreads;..\feature\src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <SDLCheck>true</SDLCheck>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalLibraryDirectories>$(SolutionDir)\$(Platform)\$(Configuration)\;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>libvmaf.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/feature/vs2015/psnr/psnr.vcxproj.filters b/feature/vs2015/psnr/psnr.vcxproj.filters
new file mode 100644
index 000000000..479b15852
--- /dev/null
+++ b/feature/vs2015/psnr/psnr.vcxproj.filters
@@ -0,0 +1,22 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\src\psnr_main.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/feature/vs2015/ssim/ssim.vcxproj b/feature/vs2015/ssim/ssim.vcxproj
new file mode 100644
index 000000000..f51ef96a8
--- /dev/null
+++ b/feature/vs2015/ssim/ssim.vcxproj
@@ -0,0 +1,95 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\src\ssim_main.c" />
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{418D8FFD-D23A-4C56-8A94-D4B9D39083D1}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>ssim</RootNamespace>
+    <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="Shared">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>MULTI_THREADING;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;PTW32_STATIC_LIB;OC_NEW_STYLE_INCLUDES;WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <SDLCheck>true</SDLCheck>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalDependencies>libvmaf.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalLibraryDirectories>$(SolutionDir)\$(Platform)\$(Configuration)\;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>MULTI_THREADING;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;PTW32_STATIC_LIB;OC_NEW_STYLE_INCLUDES;WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <SDLCheck>true</SDLCheck>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalDependencies>libvmaf.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalLibraryDirectories>$(SolutionDir)\$(Platform)\$(Configuration)\;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+    </Link>
+  </ItemDefinitionGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/feature/vs2015/ssim/ssim.vcxproj.filters b/feature/vs2015/ssim/ssim.vcxproj.filters
new file mode 100644
index 000000000..5c5c46ba4
--- /dev/null
+++ b/feature/vs2015/ssim/ssim.vcxproj.filters
@@ -0,0 +1,22 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\src\ssim_main.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/feature/vs2015/vmaf/vmaf.vcxproj b/feature/vs2015/vmaf/vmaf.vcxproj
new file mode 100644
index 000000000..fcc0f75ac
--- /dev/null
+++ b/feature/vs2015/vmaf/vmaf.vcxproj
@@ -0,0 +1,95 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\src\vmaf_main.c" />
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{054010E9-3004-4C24-B0F3-DCCE36D6B436}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>vmaf</RootNamespace>
+    <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="Shared">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>MULTI_THREADING;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;PTW32_STATIC_LIB;OC_NEW_STYLE_INCLUDES;WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\feature\src\common;..\ptools;..\ptools\opencontainers_1_8_4\include;..\pthreads;..\feature\src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalLibraryDirectories>$(SolutionDir)\$(Platform)\$(Configuration)\;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>libvmaf.lib;libpthread-static.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>MULTI_THREADING;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;PTW32_STATIC_LIB;OC_NEW_STYLE_INCLUDES;WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\feature\src\common;..\ptools;..\ptools\opencontainers_1_8_4\include;..\pthreads;..\feature\src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalLibraryDirectories>$(SolutionDir)\$(Platform)\$(Configuration)\;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>libvmaf.lib;libpthread-static.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/feature/vs2015/vmaf/vmaf.vcxproj.filters b/feature/vs2015/vmaf/vmaf.vcxproj.filters
new file mode 100644
index 000000000..6b6a90d60
--- /dev/null
+++ b/feature/vs2015/vmaf/vmaf.vcxproj.filters
@@ -0,0 +1,22 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\src\vmaf_main.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/pthreads/COPKG/pthreads.vcxproj b/pthreads/COPKG/pthreads.vcxproj
index 7e5933f81..4f9cc09c4 100644
--- a/pthreads/COPKG/pthreads.vcxproj
+++ b/pthreads/COPKG/pthreads.vcxproj
@@ -1,30 +1,6 @@
 ﻿<?xml version="1.0" encoding="utf-8"?>
 <Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
   <ItemGroup Label="ProjectConfigurations">
-    <ProjectConfiguration Include="DebugLib|Win32">
-      <Configuration>DebugLib</Configuration>
-      <Platform>Win32</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="DebugLib|x64">
-      <Configuration>DebugLib</Configuration>
-      <Platform>x64</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Debug|Win32">
-      <Configuration>Debug</Configuration>
-      <Platform>Win32</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="ReleaseLib|Win32">
-      <Configuration>ReleaseLib</Configuration>
-      <Platform>Win32</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="ReleaseLib|x64">
-      <Configuration>ReleaseLib</Configuration>
-      <Platform>x64</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Release|Win32">
-      <Configuration>Release</Configuration>
-      <Platform>Win32</Platform>
-    </ProjectConfiguration>
     <ProjectConfiguration Include="Debug|x64">
       <Configuration>Debug</Configuration>
       <Platform>x64</Platform>
@@ -62,7 +38,6 @@
   -->
   <PropertyGroup Label="Configuration">
     <UseDebugLibraries Condition="'$(Configuration)'=='Debug'">true</UseDebugLibraries>
-    <UseDebugLibraries Condition="'$(Configuration)'=='DebugLib'">true</UseDebugLibraries>
     <UseDebugLibraries Condition="'$(Configuration)'!='Debug'">false</UseDebugLibraries>
     <CharacterSet>MultiByte</CharacterSet>
   </PropertyGroup>
@@ -132,18 +107,6 @@
       <GenerateDebugInformation>true</GenerateDebugInformation>
     </Link>
   </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)'=='DebugLib'">
-    <ClCompile>
-      <Optimization>Disabled</Optimization>
-      <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <RuntimeLibrary Condition="'$(Configuration)|$(Platform)'=='DebugLib|x64'">MultiThreadedDebugDLL</RuntimeLibrary>
-      <CompileAs Condition="'$(Configuration)|$(Platform)'=='DebugLib|x64'">Default</CompileAs>
-      <MultiProcessorCompilation Condition="'$(Configuration)|$(Platform)'=='DebugLib|x64'">true</MultiProcessorCompilation>
-    </ClCompile>
-    <Link>
-      <GenerateDebugInformation>true</GenerateDebugInformation>
-    </Link>
-  </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
     <ClCompile>
       <Optimization>MaxSpeed</Optimization>
@@ -158,20 +121,6 @@
       <OptimizeReferences>true</OptimizeReferences>
     </Link>
   </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)'=='ReleaseLib'">
-    <ClCompile>
-      <Optimization>MaxSpeed</Optimization>
-      <FunctionLevelLinking>true</FunctionLevelLinking>
-      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <RuntimeLibrary Condition="'$(Configuration)|$(Platform)'=='ReleaseLib|x64'">MultiThreadedDLL</RuntimeLibrary>
-      <MultiProcessorCompilation Condition="'$(Configuration)|$(Platform)'=='ReleaseLib|x64'">true</MultiProcessorCompilation>
-    </ClCompile>
-    <Link>
-      <EnableCOMDATFolding>true</EnableCOMDATFolding>
-      <OptimizeReferences>true</OptimizeReferences>
-    </Link>
-  </ItemDefinitionGroup>
   <!--
 pthread $(VERSION) :LIBRARY: attr.c barrier.c cancel.c cleanup.c condvar.c \
 	create.c dll.c exit.c fork.c global.c misc.c mutex.c private.c \
diff --git a/ptools/ptools.vcxproj b/ptools/ptools.vcxproj
index 046e93b2c..fe78ce86c 100644
--- a/ptools/ptools.vcxproj
+++ b/ptools/ptools.vcxproj
@@ -1,18 +1,10 @@
 ﻿<?xml version="1.0" encoding="utf-8"?>
 <Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
   <ItemGroup Label="ProjectConfigurations">
-    <ProjectConfiguration Include="DebugLib|x64">
-      <Configuration>DebugLib</Configuration>
-      <Platform>x64</Platform>
-    </ProjectConfiguration>
     <ProjectConfiguration Include="Debug|x64">
       <Configuration>Debug</Configuration>
       <Platform>x64</Platform>
     </ProjectConfiguration>
-    <ProjectConfiguration Include="ReleaseLib|x64">
-      <Configuration>ReleaseLib</Configuration>
-      <Platform>x64</Platform>
-    </ProjectConfiguration>
     <ProjectConfiguration Include="Release|x64">
       <Configuration>Release</Configuration>
       <Platform>x64</Platform>
@@ -57,12 +49,6 @@
     <PlatformToolset>v140</PlatformToolset>
     <CharacterSet>MultiByte</CharacterSet>
   </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='DebugLib|x64'" Label="Configuration">
-    <ConfigurationType>StaticLibrary</ConfigurationType>
-    <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v140</PlatformToolset>
-    <CharacterSet>MultiByte</CharacterSet>
-  </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
     <ConfigurationType>StaticLibrary</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
@@ -70,13 +56,6 @@
     <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>MultiByte</CharacterSet>
   </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='ReleaseLib|x64'" Label="Configuration">
-    <ConfigurationType>StaticLibrary</ConfigurationType>
-    <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v140</PlatformToolset>
-    <WholeProgramOptimization>true</WholeProgramOptimization>
-    <CharacterSet>MultiByte</CharacterSet>
-  </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
   </ImportGroup>
@@ -84,36 +63,20 @@
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
-  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='DebugLib|x64'" Label="PropertySheets">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
-  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='ReleaseLib|x64'" Label="PropertySheets">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
   <PropertyGroup Label="UserMacros" />
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <Linkage-pthreads>static</Linkage-pthreads>
     <OutDir>$(SolutionDir)\$(Platform)\$(Configuration)\</OutDir>
     <IntDir>$(Platform)\$(Configuration)\</IntDir>
   </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='DebugLib|x64'">
-    <Linkage-pthreads>static</Linkage-pthreads>
-    <OutDir>$(SolutionDir)\$(Platform)\$(Configuration)\</OutDir>
-    <IntDir>$(Platform)\$(Configuration)\</IntDir>
-  </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <Linkage-pthreads>static</Linkage-pthreads>
     <OutDir>$(SolutionDir)\$(Platform)\$(Configuration)\</OutDir>
     <IntDir>$(Platform)\$(Configuration)\</IntDir>
   </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='ReleaseLib|x64'">
-    <Linkage-pthreads>static</Linkage-pthreads>
-    <OutDir>$(SolutionDir)\$(Platform)\$(Configuration)\</OutDir>
-    <IntDir>$(Platform)\$(Configuration)\</IntDir>
-  </PropertyGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <ClCompile>
       <PrecompiledHeader>
@@ -130,22 +93,6 @@
       <SubSystem>Windows</SubSystem>
     </Link>
   </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='DebugLib|x64'">
-    <ClCompile>
-      <PrecompiledHeader>
-      </PrecompiledHeader>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>Disabled</Optimization>
-      <PreprocessorDefinitions>PTW32_STATIC_LIB;OC_NEW_STYLE_INCLUDES;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_WARNINGS;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <SDLCheck>true</SDLCheck>
-      <AdditionalIncludeDirectories>opencontainers_1_8_4\include;..\pthreads;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
-      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
-      <MultiProcessorCompilation>true</MultiProcessorCompilation>
-    </ClCompile>
-    <Link>
-      <SubSystem>Windows</SubSystem>
-    </Link>
-  </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <ClCompile>
       <WarningLevel>Level3</WarningLevel>
@@ -166,25 +113,5 @@
       <OptimizeReferences>true</OptimizeReferences>
     </Link>
   </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='ReleaseLib|x64'">
-    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
-      <PrecompiledHeader>
-      </PrecompiledHeader>
-      <Optimization>MaxSpeed</Optimization>
-      <FunctionLevelLinking>true</FunctionLevelLinking>
-      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <PreprocessorDefinitions>PTW32_STATIC_LIB;OC_NEW_STYLE_INCLUDES;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_WARNINGS;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <SDLCheck>true</SDLCheck>
-      <AdditionalIncludeDirectories>opencontainers_1_8_4\include;..\pthreads;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
-      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
-      <MultiProcessorCompilation>true</MultiProcessorCompilation>
-    </ClCompile>
-    <Link>
-      <SubSystem>Windows</SubSystem>
-      <EnableCOMDATFolding>true</EnableCOMDATFolding>
-      <OptimizeReferences>true</OptimizeReferences>
-    </Link>
-  </ItemDefinitionGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
 </Project>
\ No newline at end of file
diff --git a/vmaf.sln b/vmaf.sln
index ea83da7ab..ab12c1272 100644
--- a/vmaf.sln
+++ b/vmaf.sln
@@ -7,6 +7,7 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "vmafossexec", "wrapper\wrap
 	ProjectSection(ProjectDependencies) = postProject
 		{3F07B371-1B81-477E-886C-0E079B0A6803} = {3F07B371-1B81-477E-886C-0E079B0A6803}
 		{03A4C79F-F1A8-48C9-A2AC-0A14EC0F093E} = {03A4C79F-F1A8-48C9-A2AC-0A14EC0F093E}
+		{5F5103F4-E473-4476-8E7B-FD3465E872B1} = {5F5103F4-E473-4476-8E7B-FD3465E872B1}
 	EndProjectSection
 EndProject
 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ptools", "ptools\ptools.vcxproj", "{3F07B371-1B81-477E-886C-0E079B0A6803}"
@@ -16,40 +17,87 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ptools", "ptools\ptools.vcx
 EndProject
 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pthreads", "pthreads\COPKG\pthreads.vcxproj", "{03A4C79F-F1A8-48C9-A2AC-0A14EC0F093E}"
 EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libvmaf", "wrapper\libvmaf.vcxproj", "{5F5103F4-E473-4476-8E7B-FD3465E872B1}"
+EndProject
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "tools", "tools", "{3202BE63-8C0E-4405-9B6D-DFD6F99B3CB4}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "moment", "feature\vs2015\moment\moment.vcxproj", "{D67BDBF2-D42F-465D-BABD-A381BFFAA373}"
+	ProjectSection(ProjectDependencies) = postProject
+		{5F5103F4-E473-4476-8E7B-FD3465E872B1} = {5F5103F4-E473-4476-8E7B-FD3465E872B1}
+	EndProjectSection
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ms_ssim", "feature\vs2015\ms_ssim\ms_ssim.vcxproj", "{CF8FA427-306B-4803-9F23-31C229A630B6}"
+	ProjectSection(ProjectDependencies) = postProject
+		{5F5103F4-E473-4476-8E7B-FD3465E872B1} = {5F5103F4-E473-4476-8E7B-FD3465E872B1}
+	EndProjectSection
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "psnr", "feature\vs2015\psnr\psnr.vcxproj", "{2DC3E418-09C6-49F4-A8DA-04C614D6016D}"
+	ProjectSection(ProjectDependencies) = postProject
+		{5F5103F4-E473-4476-8E7B-FD3465E872B1} = {5F5103F4-E473-4476-8E7B-FD3465E872B1}
+	EndProjectSection
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ssim", "feature\vs2015\ssim\ssim.vcxproj", "{418D8FFD-D23A-4C56-8A94-D4B9D39083D1}"
+	ProjectSection(ProjectDependencies) = postProject
+		{5F5103F4-E473-4476-8E7B-FD3465E872B1} = {5F5103F4-E473-4476-8E7B-FD3465E872B1}
+	EndProjectSection
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "vmaf", "feature\vs2015\vmaf\vmaf.vcxproj", "{054010E9-3004-4C24-B0F3-DCCE36D6B436}"
+	ProjectSection(ProjectDependencies) = postProject
+		{03A4C79F-F1A8-48C9-A2AC-0A14EC0F093E} = {03A4C79F-F1A8-48C9-A2AC-0A14EC0F093E}
+		{5F5103F4-E473-4476-8E7B-FD3465E872B1} = {5F5103F4-E473-4476-8E7B-FD3465E872B1}
+	EndProjectSection
+EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
 		Debug|x64 = Debug|x64
-		DebugLib|x64 = DebugLib|x64
 		Release|x64 = Release|x64
-		ReleaseLib|x64 = ReleaseLib|x64
 	EndGlobalSection
 	GlobalSection(ProjectConfigurationPlatforms) = postSolution
 		{C2D3FD1E-9068-494D-9655-88CE906B4C8B}.Debug|x64.ActiveCfg = Debug|x64
 		{C2D3FD1E-9068-494D-9655-88CE906B4C8B}.Debug|x64.Build.0 = Debug|x64
-		{C2D3FD1E-9068-494D-9655-88CE906B4C8B}.DebugLib|x64.ActiveCfg = DebugLib|x64
-		{C2D3FD1E-9068-494D-9655-88CE906B4C8B}.DebugLib|x64.Build.0 = DebugLib|x64
 		{C2D3FD1E-9068-494D-9655-88CE906B4C8B}.Release|x64.ActiveCfg = Release|x64
 		{C2D3FD1E-9068-494D-9655-88CE906B4C8B}.Release|x64.Build.0 = Release|x64
-		{C2D3FD1E-9068-494D-9655-88CE906B4C8B}.ReleaseLib|x64.ActiveCfg = ReleaseLib|x64
-		{C2D3FD1E-9068-494D-9655-88CE906B4C8B}.ReleaseLib|x64.Build.0 = ReleaseLib|x64
 		{3F07B371-1B81-477E-886C-0E079B0A6803}.Debug|x64.ActiveCfg = Debug|x64
 		{3F07B371-1B81-477E-886C-0E079B0A6803}.Debug|x64.Build.0 = Debug|x64
-		{3F07B371-1B81-477E-886C-0E079B0A6803}.DebugLib|x64.ActiveCfg = DebugLib|x64
-		{3F07B371-1B81-477E-886C-0E079B0A6803}.DebugLib|x64.Build.0 = DebugLib|x64
 		{3F07B371-1B81-477E-886C-0E079B0A6803}.Release|x64.ActiveCfg = Release|x64
 		{3F07B371-1B81-477E-886C-0E079B0A6803}.Release|x64.Build.0 = Release|x64
-		{3F07B371-1B81-477E-886C-0E079B0A6803}.ReleaseLib|x64.ActiveCfg = ReleaseLib|x64
-		{3F07B371-1B81-477E-886C-0E079B0A6803}.ReleaseLib|x64.Build.0 = ReleaseLib|x64
 		{03A4C79F-F1A8-48C9-A2AC-0A14EC0F093E}.Debug|x64.ActiveCfg = Debug|x64
 		{03A4C79F-F1A8-48C9-A2AC-0A14EC0F093E}.Debug|x64.Build.0 = Debug|x64
-		{03A4C79F-F1A8-48C9-A2AC-0A14EC0F093E}.DebugLib|x64.ActiveCfg = DebugLib|x64
-		{03A4C79F-F1A8-48C9-A2AC-0A14EC0F093E}.DebugLib|x64.Build.0 = DebugLib|x64
 		{03A4C79F-F1A8-48C9-A2AC-0A14EC0F093E}.Release|x64.ActiveCfg = Release|x64
 		{03A4C79F-F1A8-48C9-A2AC-0A14EC0F093E}.Release|x64.Build.0 = Release|x64
-		{03A4C79F-F1A8-48C9-A2AC-0A14EC0F093E}.ReleaseLib|x64.ActiveCfg = ReleaseLib|x64
-		{03A4C79F-F1A8-48C9-A2AC-0A14EC0F093E}.ReleaseLib|x64.Build.0 = ReleaseLib|x64
+		{5F5103F4-E473-4476-8E7B-FD3465E872B1}.Debug|x64.ActiveCfg = Debug|x64
+		{5F5103F4-E473-4476-8E7B-FD3465E872B1}.Debug|x64.Build.0 = Debug|x64
+		{5F5103F4-E473-4476-8E7B-FD3465E872B1}.Release|x64.ActiveCfg = Release|x64
+		{5F5103F4-E473-4476-8E7B-FD3465E872B1}.Release|x64.Build.0 = Release|x64
+		{D67BDBF2-D42F-465D-BABD-A381BFFAA373}.Debug|x64.ActiveCfg = Debug|x64
+		{D67BDBF2-D42F-465D-BABD-A381BFFAA373}.Debug|x64.Build.0 = Debug|x64
+		{D67BDBF2-D42F-465D-BABD-A381BFFAA373}.Release|x64.ActiveCfg = Release|x64
+		{D67BDBF2-D42F-465D-BABD-A381BFFAA373}.Release|x64.Build.0 = Release|x64
+		{CF8FA427-306B-4803-9F23-31C229A630B6}.Debug|x64.ActiveCfg = Debug|x64
+		{CF8FA427-306B-4803-9F23-31C229A630B6}.Debug|x64.Build.0 = Debug|x64
+		{CF8FA427-306B-4803-9F23-31C229A630B6}.Release|x64.ActiveCfg = Release|x64
+		{CF8FA427-306B-4803-9F23-31C229A630B6}.Release|x64.Build.0 = Release|x64
+		{2DC3E418-09C6-49F4-A8DA-04C614D6016D}.Debug|x64.ActiveCfg = Debug|x64
+		{2DC3E418-09C6-49F4-A8DA-04C614D6016D}.Debug|x64.Build.0 = Debug|x64
+		{2DC3E418-09C6-49F4-A8DA-04C614D6016D}.Release|x64.ActiveCfg = Release|x64
+		{2DC3E418-09C6-49F4-A8DA-04C614D6016D}.Release|x64.Build.0 = Release|x64
+		{418D8FFD-D23A-4C56-8A94-D4B9D39083D1}.Debug|x64.ActiveCfg = Debug|x64
+		{418D8FFD-D23A-4C56-8A94-D4B9D39083D1}.Debug|x64.Build.0 = Debug|x64
+		{418D8FFD-D23A-4C56-8A94-D4B9D39083D1}.Release|x64.ActiveCfg = Release|x64
+		{418D8FFD-D23A-4C56-8A94-D4B9D39083D1}.Release|x64.Build.0 = Release|x64
+		{054010E9-3004-4C24-B0F3-DCCE36D6B436}.Debug|x64.ActiveCfg = Debug|x64
+		{054010E9-3004-4C24-B0F3-DCCE36D6B436}.Debug|x64.Build.0 = Debug|x64
+		{054010E9-3004-4C24-B0F3-DCCE36D6B436}.Release|x64.ActiveCfg = Release|x64
+		{054010E9-3004-4C24-B0F3-DCCE36D6B436}.Release|x64.Build.0 = Release|x64
 	EndGlobalSection
 	GlobalSection(SolutionProperties) = preSolution
 		HideSolutionNode = FALSE
 	EndGlobalSection
+	GlobalSection(NestedProjects) = preSolution
+		{D67BDBF2-D42F-465D-BABD-A381BFFAA373} = {3202BE63-8C0E-4405-9B6D-DFD6F99B3CB4}
+		{CF8FA427-306B-4803-9F23-31C229A630B6} = {3202BE63-8C0E-4405-9B6D-DFD6F99B3CB4}
+		{2DC3E418-09C6-49F4-A8DA-04C614D6016D} = {3202BE63-8C0E-4405-9B6D-DFD6F99B3CB4}
+		{418D8FFD-D23A-4C56-8A94-D4B9D39083D1} = {3202BE63-8C0E-4405-9B6D-DFD6F99B3CB4}
+		{054010E9-3004-4C24-B0F3-DCCE36D6B436} = {3202BE63-8C0E-4405-9B6D-DFD6F99B3CB4}
+	EndGlobalSection
 EndGlobal
diff --git a/wrapper/libvmaf.vcxproj b/wrapper/libvmaf.vcxproj
new file mode 100644
index 000000000..40500db15
--- /dev/null
+++ b/wrapper/libvmaf.vcxproj
@@ -0,0 +1,160 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{5F5103F4-E473-4476-8E7B-FD3465E872B1}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>libvmaf</RootNamespace>
+    <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="Shared">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup />
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>MULTI_THREADING;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;PTW32_STATIC_LIB;OC_NEW_STYLE_INCLUDES;WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\feature\src\common;..\ptools;..\ptools\opencontainers_1_8_4\include;..\pthreads;..\feature\src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <SDLCheck>true</SDLCheck>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>MULTI_THREADING;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;PTW32_STATIC_LIB;OC_NEW_STYLE_INCLUDES;WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\feature\src\common;..\ptools;..\ptools\opencontainers_1_8_4\include;..\pthreads;..\feature\src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <SDLCheck>true</SDLCheck>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <Text Include="ReadMe.txt" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\feature\src\adm.c" />
+    <ClCompile Include="..\feature\src\adm_tools.c" />
+    <ClCompile Include="..\feature\src\all.c" />
+    <ClCompile Include="..\feature\src\ansnr.c" />
+    <ClCompile Include="..\feature\src\ansnr_tools.c" />
+    <ClCompile Include="..\feature\src\common\alignment.c" />
+    <ClCompile Include="..\feature\src\common\alloc.c" />
+    <ClCompile Include="..\feature\src\common\blur_array.c" />
+    <ClCompile Include="..\feature\src\common\convolution.c" />
+    <ClCompile Include="..\feature\src\common\convolution_avx.c" />
+    <ClCompile Include="..\feature\src\common\cpu.c" />
+    <ClCompile Include="..\feature\src\common\file_io.c" />
+    <ClCompile Include="..\feature\src\common\frame.c" />
+    <ClCompile Include="..\feature\src\iqa\convolve.c" />
+    <ClCompile Include="..\feature\src\iqa\decimate.c" />
+    <ClCompile Include="..\feature\src\iqa\math_utils.c" />
+    <ClCompile Include="..\feature\src\iqa\ssim_tools.c" />
+    <ClCompile Include="..\feature\src\moment.c" />
+    <ClCompile Include="..\feature\src\motion.c" />
+    <ClCompile Include="..\feature\src\ms_ssim.c" />
+    <ClCompile Include="..\feature\src\psnr.c" />
+    <ClCompile Include="..\feature\src\psnr_tools.c" />
+    <ClCompile Include="..\feature\src\ssim.c" />
+    <ClCompile Include="..\feature\src\vif.c" />
+    <ClCompile Include="..\feature\src\vif_tools.c" />
+    <ClCompile Include="src\combo.c" />
+    <ClCompile Include="src\cpu_info.c" />
+    <ClCompile Include="src\darray.c" />
+    <ClCompile Include="src\libvmaf.cpp" />
+    <ClCompile Include="src\pugixml\pugixml.cpp" />
+    <ClCompile Include="src\svm.cpp" />
+    <ClCompile Include="src\vmaf.cpp" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\feature\src\adm_options.h" />
+    <ClInclude Include="..\feature\src\adm_tools.h" />
+    <ClInclude Include="..\feature\src\ansnr_options.h" />
+    <ClInclude Include="..\feature\src\ansnr_tools.h" />
+    <ClInclude Include="..\feature\src\common\alignment.h" />
+    <ClInclude Include="..\feature\src\common\alloc.h" />
+    <ClInclude Include="..\feature\src\common\blur_array.h" />
+    <ClInclude Include="..\feature\src\common\convolution.h" />
+    <ClInclude Include="..\feature\src\common\convolution_internal.h" />
+    <ClInclude Include="..\feature\src\common\cpu.h" />
+    <ClInclude Include="..\feature\src\common\cpudetect.h" />
+    <ClInclude Include="..\feature\src\common\file_io.h" />
+    <ClInclude Include="..\feature\src\common\frame.h" />
+    <ClInclude Include="..\feature\src\common\macros.h" />
+    <ClInclude Include="..\feature\src\iqa\convolve.h" />
+    <ClInclude Include="..\feature\src\iqa\decimate.h" />
+    <ClInclude Include="..\feature\src\iqa\iqa.h" />
+    <ClInclude Include="..\feature\src\iqa\iqa_options.h" />
+    <ClInclude Include="..\feature\src\iqa\iqa_os.h" />
+    <ClInclude Include="..\feature\src\iqa\math_utils.h" />
+    <ClInclude Include="..\feature\src\iqa\ssim_tools.h" />
+    <ClInclude Include="..\feature\src\motion_options.h" />
+    <ClInclude Include="..\feature\src\motion_tools.h" />
+    <ClInclude Include="..\feature\src\psnr_options.h" />
+    <ClInclude Include="..\feature\src\psnr_tools.h" />
+    <ClInclude Include="..\feature\src\vif_options.h" />
+    <ClInclude Include="..\feature\src\vif_tools.h" />
+    <ClInclude Include="src\combo.h" />
+    <ClInclude Include="src\cpu_info.h" />
+    <ClInclude Include="src\darray.h" />
+    <ClInclude Include="src\libvmaf.h" />
+    <ClInclude Include="src\pugixml\pugiconfig.hpp" />
+    <ClInclude Include="src\pugixml\pugixml.hpp" />
+    <ClInclude Include="src\svm.h" />
+    <ClInclude Include="src\timer.h" />
+    <ClInclude Include="src\vmaf.h" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/wrapper/libvmaf.vcxproj.filters b/wrapper/libvmaf.vcxproj.filters
new file mode 100644
index 000000000..d128d8e5f
--- /dev/null
+++ b/wrapper/libvmaf.vcxproj.filters
@@ -0,0 +1,240 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
+    </Filter>
+    <Filter Include="feature">
+      <UniqueIdentifier>{dee9a2f0-7c52-449e-9d11-a683f66eb2c9}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="feature\common">
+      <UniqueIdentifier>{589b3bda-313a-403c-953e-708015877f7b}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="feature\iqa">
+      <UniqueIdentifier>{5613a206-8935-4871-ade5-e078de0d0fa6}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="pugixml">
+      <UniqueIdentifier>{25a2d365-ffa6-4464-9712-344c46384a31}</UniqueIdentifier>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <Text Include="ReadMe.txt" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\feature\src\all.c">
+      <Filter>feature</Filter>
+    </ClCompile>
+    <ClCompile Include="..\feature\src\adm.c">
+      <Filter>feature</Filter>
+    </ClCompile>
+    <ClCompile Include="..\feature\src\adm_tools.c">
+      <Filter>feature</Filter>
+    </ClCompile>
+    <ClCompile Include="..\feature\src\ansnr.c">
+      <Filter>feature</Filter>
+    </ClCompile>
+    <ClCompile Include="..\feature\src\ansnr_tools.c">
+      <Filter>feature</Filter>
+    </ClCompile>
+    <ClCompile Include="..\feature\src\common\blur_array.c">
+      <Filter>feature</Filter>
+    </ClCompile>
+    <ClCompile Include="..\feature\src\moment.c">
+      <Filter>feature</Filter>
+    </ClCompile>
+    <ClCompile Include="..\feature\src\motion.c">
+      <Filter>feature</Filter>
+    </ClCompile>
+    <ClCompile Include="..\feature\src\ms_ssim.c">
+      <Filter>feature</Filter>
+    </ClCompile>
+    <ClCompile Include="..\feature\src\psnr.c">
+      <Filter>feature</Filter>
+    </ClCompile>
+    <ClCompile Include="..\feature\src\psnr_tools.c">
+      <Filter>feature</Filter>
+    </ClCompile>
+    <ClCompile Include="..\feature\src\ssim.c">
+      <Filter>feature</Filter>
+    </ClCompile>
+    <ClCompile Include="..\feature\src\vif.c">
+      <Filter>feature</Filter>
+    </ClCompile>
+    <ClCompile Include="..\feature\src\vif_tools.c">
+      <Filter>feature</Filter>
+    </ClCompile>
+    <ClCompile Include="..\feature\src\common\alloc.c">
+      <Filter>feature\common</Filter>
+    </ClCompile>
+    <ClCompile Include="..\feature\src\common\alignment.c">
+      <Filter>feature\common</Filter>
+    </ClCompile>
+    <ClCompile Include="..\feature\src\common\convolution.c">
+      <Filter>feature\common</Filter>
+    </ClCompile>
+    <ClCompile Include="..\feature\src\common\convolution_avx.c">
+      <Filter>feature\common</Filter>
+    </ClCompile>
+    <ClCompile Include="..\feature\src\common\cpu.c">
+      <Filter>feature\common</Filter>
+    </ClCompile>
+    <ClCompile Include="..\feature\src\common\file_io.c">
+      <Filter>feature\common</Filter>
+    </ClCompile>
+    <ClCompile Include="..\feature\src\common\frame.c">
+      <Filter>feature\common</Filter>
+    </ClCompile>
+    <ClCompile Include="..\feature\src\iqa\convolve.c">
+      <Filter>feature\iqa</Filter>
+    </ClCompile>
+    <ClCompile Include="..\feature\src\iqa\decimate.c">
+      <Filter>feature\iqa</Filter>
+    </ClCompile>
+    <ClCompile Include="..\feature\src\iqa\math_utils.c">
+      <Filter>feature\iqa</Filter>
+    </ClCompile>
+    <ClCompile Include="..\feature\src\iqa\ssim_tools.c">
+      <Filter>feature\iqa</Filter>
+    </ClCompile>
+    <ClCompile Include="src\pugixml\pugixml.cpp">
+      <Filter>pugixml</Filter>
+    </ClCompile>
+    <ClCompile Include="src\combo.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="src\cpu_info.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="src\darray.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="src\libvmaf.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="src\svm.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="src\vmaf.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\feature\src\adm_options.h">
+      <Filter>feature</Filter>
+    </ClInclude>
+    <ClInclude Include="..\feature\src\adm_tools.h">
+      <Filter>feature</Filter>
+    </ClInclude>
+    <ClInclude Include="..\feature\src\ansnr_options.h">
+      <Filter>feature</Filter>
+    </ClInclude>
+    <ClInclude Include="..\feature\src\ansnr_tools.h">
+      <Filter>feature</Filter>
+    </ClInclude>
+    <ClInclude Include="..\feature\src\common\blur_array.h">
+      <Filter>feature</Filter>
+    </ClInclude>
+    <ClInclude Include="..\feature\src\psnr_tools.h">
+      <Filter>feature</Filter>
+    </ClInclude>
+    <ClInclude Include="..\feature\src\motion_options.h">
+      <Filter>feature</Filter>
+    </ClInclude>
+    <ClInclude Include="..\feature\src\motion_tools.h">
+      <Filter>feature</Filter>
+    </ClInclude>
+    <ClInclude Include="..\feature\src\psnr_options.h">
+      <Filter>feature</Filter>
+    </ClInclude>
+    <ClInclude Include="..\feature\src\vif_options.h">
+      <Filter>feature</Filter>
+    </ClInclude>
+    <ClInclude Include="..\feature\src\vif_tools.h">
+      <Filter>feature</Filter>
+    </ClInclude>
+    <ClInclude Include="..\feature\src\common\alignment.h">
+      <Filter>feature\common</Filter>
+    </ClInclude>
+    <ClInclude Include="..\feature\src\common\alloc.h">
+      <Filter>feature\common</Filter>
+    </ClInclude>
+    <ClInclude Include="..\feature\src\common\convolution.h">
+      <Filter>feature\common</Filter>
+    </ClInclude>
+    <ClInclude Include="..\feature\src\common\convolution_internal.h">
+      <Filter>feature\common</Filter>
+    </ClInclude>
+    <ClInclude Include="..\feature\src\common\cpu.h">
+      <Filter>feature\common</Filter>
+    </ClInclude>
+    <ClInclude Include="..\feature\src\common\cpudetect.h">
+      <Filter>feature\common</Filter>
+    </ClInclude>
+    <ClInclude Include="..\feature\src\common\file_io.h">
+      <Filter>feature\common</Filter>
+    </ClInclude>
+    <ClInclude Include="..\feature\src\common\macros.h">
+      <Filter>feature\common</Filter>
+    </ClInclude>
+    <ClInclude Include="..\feature\src\common\frame.h">
+      <Filter>feature\common</Filter>
+    </ClInclude>
+    <ClInclude Include="..\feature\src\iqa\convolve.h">
+      <Filter>feature\iqa</Filter>
+    </ClInclude>
+    <ClInclude Include="..\feature\src\iqa\decimate.h">
+      <Filter>feature\iqa</Filter>
+    </ClInclude>
+    <ClInclude Include="..\feature\src\iqa\iqa.h">
+      <Filter>feature\iqa</Filter>
+    </ClInclude>
+    <ClInclude Include="..\feature\src\iqa\iqa_options.h">
+      <Filter>feature\iqa</Filter>
+    </ClInclude>
+    <ClInclude Include="..\feature\src\iqa\iqa_os.h">
+      <Filter>feature\iqa</Filter>
+    </ClInclude>
+    <ClInclude Include="..\feature\src\iqa\math_utils.h">
+      <Filter>feature\iqa</Filter>
+    </ClInclude>
+    <ClInclude Include="..\feature\src\iqa\ssim_tools.h">
+      <Filter>feature\iqa</Filter>
+    </ClInclude>
+    <ClInclude Include="src\pugixml\pugiconfig.hpp">
+      <Filter>pugixml</Filter>
+    </ClInclude>
+    <ClInclude Include="src\pugixml\pugixml.hpp">
+      <Filter>pugixml</Filter>
+    </ClInclude>
+    <ClInclude Include="src\combo.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="src\cpu_info.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="src\darray.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="src\libvmaf.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="src\svm.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="src\timer.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="src\vmaf.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/wrapper/wrapper.vcxproj b/wrapper/wrapper.vcxproj
index 639953500..4f3828896 100644
--- a/wrapper/wrapper.vcxproj
+++ b/wrapper/wrapper.vcxproj
@@ -1,18 +1,10 @@
 ﻿<?xml version="1.0" encoding="utf-8"?>
 <Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
   <ItemGroup Label="ProjectConfigurations">
-    <ProjectConfiguration Include="DebugLib|x64">
-      <Configuration>DebugLib</Configuration>
-      <Platform>x64</Platform>
-    </ProjectConfiguration>
     <ProjectConfiguration Include="Debug|x64">
       <Configuration>Debug</Configuration>
       <Platform>x64</Platform>
     </ProjectConfiguration>
-    <ProjectConfiguration Include="ReleaseLib|x64">
-      <Configuration>ReleaseLib</Configuration>
-      <Platform>x64</Platform>
-    </ProjectConfiguration>
     <ProjectConfiguration Include="Release|x64">
       <Configuration>Release</Configuration>
       <Platform>x64</Platform>
@@ -32,12 +24,6 @@
     <PlatformToolset>v140</PlatformToolset>
     <CharacterSet>MultiByte</CharacterSet>
   </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='DebugLib|x64'" Label="Configuration">
-    <ConfigurationType>StaticLibrary</ConfigurationType>
-    <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v140</PlatformToolset>
-    <CharacterSet>MultiByte</CharacterSet>
-  </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
@@ -45,13 +31,6 @@
     <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>MultiByte</CharacterSet>
   </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='ReleaseLib|x64'" Label="Configuration">
-    <ConfigurationType>StaticLibrary</ConfigurationType>
-    <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v140</PlatformToolset>
-    <WholeProgramOptimization>true</WholeProgramOptimization>
-    <CharacterSet>MultiByte</CharacterSet>
-  </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
   </ImportGroup>
@@ -59,32 +38,18 @@
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
-  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='DebugLib|x64'" Label="PropertySheets">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
-  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='ReleaseLib|x64'" Label="PropertySheets">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
   <PropertyGroup Label="UserMacros" />
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <LinkIncremental>true</LinkIncremental>
     <Linkage-pthreads>static</Linkage-pthreads>
   </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='DebugLib|x64'">
-    <LinkIncremental>true</LinkIncremental>
-    <Linkage-pthreads>static</Linkage-pthreads>
-  </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <LinkIncremental>false</LinkIncremental>
     <Linkage-pthreads>static</Linkage-pthreads>
   </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='ReleaseLib|x64'">
-    <LinkIncremental>false</LinkIncremental>
-    <Linkage-pthreads>static</Linkage-pthreads>
-  </PropertyGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <ClCompile>
       <PrecompiledHeader>
@@ -100,28 +65,13 @@
     <Link>
       <SubSystem>Console</SubSystem>
       <GenerateDebugInformation>true</GenerateDebugInformation>
-      <AdditionalDependencies>libpthread-static.lib;%(AdditionalDependencies)</AdditionalDependencies>
-      <AdditionalLibraryDirectories>$(SolutionDir)\$(Platform)\$(Configuration)\;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
-    </Link>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='DebugLib|x64'">
-    <ClCompile>
-      <PrecompiledHeader>
-      </PrecompiledHeader>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>Disabled</Optimization>
-      <PreprocessorDefinitions>MULTI_THREADING;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;PTW32_STATIC_LIB;OC_NEW_STYLE_INCLUDES;WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <SDLCheck>true</SDLCheck>
-      <AdditionalIncludeDirectories>..\feature\src\common;..\ptools;..\ptools\opencontainers_1_8_4\include;..\pthreads;..\feature\src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
-      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
-      <MultiProcessorCompilation>true</MultiProcessorCompilation>
-    </ClCompile>
-    <Link>
-      <SubSystem>Console</SubSystem>
-      <GenerateDebugInformation>true</GenerateDebugInformation>
-      <AdditionalDependencies>libpthread-static.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalDependencies>libvmaf.lib;libpthread-static.lib;%(AdditionalDependencies)</AdditionalDependencies>
       <AdditionalLibraryDirectories>$(SolutionDir)\$(Platform)\$(Configuration)\;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
     </Link>
+    <PostBuildEvent>
+      <Message>copy examples.bat to output folder</Message>
+      <Command>copy /y "$(SolutionDir)feature\examples.bat" "$(OutDir)"</Command>
+    </PostBuildEvent>
   </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <ClCompile>
@@ -143,104 +93,16 @@
       <EnableCOMDATFolding>true</EnableCOMDATFolding>
       <OptimizeReferences>true</OptimizeReferences>
       <GenerateDebugInformation>true</GenerateDebugInformation>
-      <AdditionalDependencies>libpthread-static.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalDependencies>libvmaf.lib;libpthread-static.lib;%(AdditionalDependencies)</AdditionalDependencies>
       <AdditionalLibraryDirectories>$(SolutionDir)\$(Platform)\$(Configuration)\;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
     </Link>
+    <PostBuildEvent>
+      <Message>copy examples.bat to output folder</Message>
+      <Command>copy /y "$(SolutionDir)feature\examples.bat" "$(OutDir)"</Command>
+    </PostBuildEvent>
   </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='ReleaseLib|x64'">
-    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
-      <PrecompiledHeader>
-      </PrecompiledHeader>
-      <Optimization>MaxSpeed</Optimization>
-      <FunctionLevelLinking>true</FunctionLevelLinking>
-      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <PreprocessorDefinitions>MULTI_THREADING;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;PTW32_STATIC_LIB;OC_NEW_STYLE_INCLUDES;WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <SDLCheck>true</SDLCheck>
-      <AdditionalIncludeDirectories>..\feature\src\common;..\ptools;..\ptools\opencontainers_1_8_4\include;..\pthreads;..\feature\src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
-      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
-      <EnableEnhancedInstructionSet>AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
-      <MultiProcessorCompilation>true</MultiProcessorCompilation>
-    </ClCompile>
-    <Link>
-      <SubSystem>Console</SubSystem>
-      <EnableCOMDATFolding>true</EnableCOMDATFolding>
-      <OptimizeReferences>true</OptimizeReferences>
-      <GenerateDebugInformation>true</GenerateDebugInformation>
-      <AdditionalDependencies>libpthread-static.lib;%(AdditionalDependencies)</AdditionalDependencies>
-      <AdditionalLibraryDirectories>$(SolutionDir)\$(Platform)\$(Configuration)\;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
-    </Link>
-  </ItemDefinitionGroup>
-  <ItemGroup>
-    <ClInclude Include="..\feature\src\adm_options.h" />
-    <ClInclude Include="..\feature\src\adm_tools.h" />
-    <ClInclude Include="..\feature\src\ansnr_options.h" />
-    <ClInclude Include="..\feature\src\ansnr_tools.h" />
-    <ClInclude Include="..\feature\src\common\alignment.h" />
-    <ClInclude Include="..\feature\src\common\alloc.h" />
-    <ClInclude Include="..\feature\src\common\blur_array.h" />
-    <ClInclude Include="..\feature\src\common\convolution.h" />
-    <ClInclude Include="..\feature\src\common\convolution_internal.h" />
-    <ClInclude Include="..\feature\src\common\cpu.h" />
-    <ClInclude Include="..\feature\src\common\cpudetect.h" />
-    <ClInclude Include="..\feature\src\common\file_io.h" />
-    <ClInclude Include="..\feature\src\common\macros.h" />
-    <ClInclude Include="..\feature\src\common\frame.h" />
-    <ClInclude Include="..\feature\src\psnr_tools.h" />
-    <ClInclude Include="..\feature\src\iqa\convolve.h" />
-    <ClInclude Include="..\feature\src\iqa\decimate.h" />
-    <ClInclude Include="..\feature\src\iqa\iqa.h" />
-    <ClInclude Include="..\feature\src\iqa\iqa_options.h" />
-    <ClInclude Include="..\feature\src\iqa\iqa_os.h" />
-    <ClInclude Include="..\feature\src\iqa\math_utils.h" />
-    <ClInclude Include="..\feature\src\iqa\ssim_tools.h" />
-    <ClInclude Include="..\feature\src\motion_options.h" />
-    <ClInclude Include="..\feature\src\motion_tools.h" />
-    <ClInclude Include="..\feature\src\psnr_options.h" />
-    <ClInclude Include="..\feature\src\vif_options.h" />
-    <ClInclude Include="..\feature\src\vif_tools.h" />
-    <ClInclude Include="src\combo.h" />
-    <ClInclude Include="src\cpu_info.h" />
-    <ClInclude Include="src\darray.h" />
-    <ClInclude Include="src\libvmaf.h" />
-    <ClInclude Include="src\pugixml\pugiconfig.hpp" />
-    <ClInclude Include="src\pugixml\pugixml.hpp" />
-    <ClInclude Include="src\svm.h" />
-    <ClInclude Include="src\timer.h" />
-    <ClInclude Include="src\vmaf.h" />
-  </ItemGroup>
   <ItemGroup>
-    <ClCompile Include="..\feature\src\adm.c" />
-    <ClCompile Include="..\feature\src\adm_tools.c" />
-    <ClCompile Include="..\feature\src\ansnr.c" />
-    <ClCompile Include="..\feature\src\ansnr_tools.c" />
-    <ClCompile Include="..\feature\src\common\alloc.c" />
-    <ClCompile Include="..\feature\src\common\alignment.c" />
-    <ClCompile Include="..\feature\src\common\blur_array.c" />
-    <ClCompile Include="..\feature\src\common\convolution.c" />
-    <ClCompile Include="..\feature\src\common\convolution_avx.c" />
-    <ClCompile Include="..\feature\src\common\cpu.c" />
-    <ClCompile Include="..\feature\src\common\file_io.c" />
-    <ClCompile Include="..\feature\src\common\frame.c" />
-    <ClCompile Include="..\feature\src\iqa\convolve.c" />
-    <ClCompile Include="..\feature\src\iqa\decimate.c" />
-    <ClCompile Include="..\feature\src\iqa\math_utils.c" />
-    <ClCompile Include="..\feature\src\iqa\ssim_tools.c" />
-    <ClCompile Include="..\feature\src\motion.c" />
-    <ClCompile Include="..\feature\src\ms_ssim.c" />
-    <ClCompile Include="..\feature\src\psnr.c" />
-    <ClCompile Include="..\feature\src\psnr_tools.c" />
-    <ClCompile Include="..\feature\src\ssim.c" />
-    <ClCompile Include="..\feature\src\vif.c" />
-    <ClCompile Include="..\feature\src\vif_tools.c" />
-    <ClCompile Include="src\combo.c" />
-    <ClCompile Include="src\cpu_info.c" />
-    <ClCompile Include="src\darray.c" />
-    <ClCompile Include="src\libvmaf.cpp" />
     <ClCompile Include="src\main.cpp" />
-    <ClCompile Include="src\pugixml\pugixml.cpp" />
-    <ClCompile Include="src\svm.cpp" />
-    <ClCompile Include="src\vmaf.cpp" />
   </ItemGroup>
   <ItemGroup>
     <ProjectReference Include="..\ptools\ptools.vcxproj">
diff --git a/wrapper/wrapper.vcxproj.filters b/wrapper/wrapper.vcxproj.filters
index ed47ca6e3..2dc63c25f 100644
--- a/wrapper/wrapper.vcxproj.filters
+++ b/wrapper/wrapper.vcxproj.filters
@@ -2,193 +2,5 @@
 <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
   <ItemGroup>
     <ClCompile Include="src\main.cpp" />
-    <ClCompile Include="src\svm.cpp" />
-    <ClCompile Include="src\vmaf.cpp" />
-    <ClCompile Include="..\feature\src\adm.c">
-      <Filter>feature</Filter>
-    </ClCompile>
-    <ClCompile Include="..\feature\src\adm_tools.c">
-      <Filter>feature</Filter>
-    </ClCompile>
-    <ClCompile Include="..\feature\src\iqa\convolve.c">
-      <Filter>feature\iqa</Filter>
-    </ClCompile>
-    <ClCompile Include="..\feature\src\iqa\decimate.c">
-      <Filter>feature\iqa</Filter>
-    </ClCompile>
-    <ClCompile Include="..\feature\src\common\alloc.c">
-      <Filter>feature\common</Filter>
-    </ClCompile>
-    <ClCompile Include="..\feature\src\common\alignment.c">
-      <Filter>feature\common</Filter>
-    </ClCompile>
-    <ClCompile Include="..\feature\src\common\convolution.c">
-      <Filter>feature\common</Filter>
-    </ClCompile>
-    <ClCompile Include="..\feature\src\common\cpu.c">
-      <Filter>feature\common</Filter>
-    </ClCompile>
-    <ClCompile Include="..\feature\src\common\file_io.c">
-      <Filter>feature\common</Filter>
-    </ClCompile>
-    <ClCompile Include="..\feature\src\common\frame.c">
-      <Filter>feature\common</Filter>
-    </ClCompile>
-    <ClCompile Include="..\feature\src\vif_tools.c">
-      <Filter>feature</Filter>
-    </ClCompile>
-    <ClCompile Include="..\feature\src\ansnr.c">
-      <Filter>feature</Filter>
-    </ClCompile>
-    <ClCompile Include="..\feature\src\ansnr_tools.c">
-      <Filter>feature</Filter>
-    </ClCompile>
-    <ClCompile Include="..\feature\src\common\convolution_avx.c">
-      <Filter>feature\common</Filter>
-    </ClCompile>
-    <ClCompile Include="src\pugixml\pugixml.cpp">
-      <Filter>pugixml</Filter>
-    </ClCompile>
-    <ClCompile Include="..\feature\src\iqa\math_utils.c">
-      <Filter>feature\iqa</Filter>
-    </ClCompile>
-    <ClCompile Include="..\feature\src\motion.c">
-      <Filter>feature</Filter>
-    </ClCompile>
-    <ClCompile Include="..\feature\src\psnr.c">
-      <Filter>feature</Filter>
-    </ClCompile>
-    <ClCompile Include="..\feature\src\psnr_tools.c">
-      <Filter>feature</Filter>
-    </ClCompile>
-    <ClCompile Include="..\feature\src\iqa\ssim_tools.c">
-      <Filter>feature\iqa</Filter>
-    </ClCompile>
-    <ClCompile Include="..\feature\src\vif.c">
-      <Filter>feature</Filter>
-    </ClCompile>
-    <ClCompile Include="src\darray.c" />
-    <ClCompile Include="src\combo.c" />
-    <ClCompile Include="..\feature\src\ssim.c">
-      <Filter>feature</Filter>
-    </ClCompile>
-    <ClCompile Include="..\feature\src\ms_ssim.c">
-      <Filter>feature</Filter>
-    </ClCompile>
-    <ClCompile Include="src\libvmaf.cpp" />
-    <ClCompile Include="..\feature\src\common\blur_array.c">
-      <Filter>feature</Filter>
-    </ClCompile>
-    <ClCompile Include="src\cpu_info.c" />
-  </ItemGroup>
-  <ItemGroup>
-    <ClInclude Include="src\svm.h" />
-    <ClInclude Include="src\vmaf.h" />
-    <ClInclude Include="..\feature\src\adm_options.h">
-      <Filter>feature</Filter>
-    </ClInclude>
-    <ClInclude Include="..\feature\src\adm_tools.h">
-      <Filter>feature</Filter>
-    </ClInclude>
-    <ClInclude Include="..\feature\src\iqa\iqa.h">
-      <Filter>feature\iqa</Filter>
-    </ClInclude>
-    <ClInclude Include="..\feature\src\iqa\iqa_options.h">
-      <Filter>feature\iqa</Filter>
-    </ClInclude>
-    <ClInclude Include="..\feature\src\iqa\iqa_os.h">
-      <Filter>feature\iqa</Filter>
-    </ClInclude>
-    <ClInclude Include="..\feature\src\iqa\convolve.h">
-      <Filter>feature\iqa</Filter>
-    </ClInclude>
-    <ClInclude Include="..\feature\src\iqa\decimate.h">
-      <Filter>feature\iqa</Filter>
-    </ClInclude>
-    <ClInclude Include="..\feature\src\common\alignment.h">
-      <Filter>feature\common</Filter>
-    </ClInclude>
-    <ClInclude Include="..\feature\src\common\alloc.h">
-      <Filter>feature\common</Filter>
-    </ClInclude>
-    <ClInclude Include="..\feature\src\common\convolution.h">
-      <Filter>feature\common</Filter>
-    </ClInclude>
-    <ClInclude Include="..\feature\src\common\cpu.h">
-      <Filter>feature\common</Filter>
-    </ClInclude>
-    <ClInclude Include="..\feature\src\common\cpudetect.h">
-      <Filter>feature\common</Filter>
-    </ClInclude>
-    <ClInclude Include="..\feature\src\common\file_io.h">
-      <Filter>feature\common</Filter>
-    </ClInclude>
-    <ClInclude Include="..\feature\src\common\macros.h">
-      <Filter>feature\common</Filter>
-    </ClInclude>
-    <ClInclude Include="..\feature\src\common\frame.h">
-      <Filter>feature\common</Filter>
-    </ClInclude>
-    <ClInclude Include="..\feature\src\motion_options.h">
-      <Filter>feature</Filter>
-    </ClInclude>
-    <ClInclude Include="..\feature\src\motion_tools.h">
-      <Filter>feature</Filter>
-    </ClInclude>
-    <ClInclude Include="..\feature\src\psnr_options.h">
-      <Filter>feature</Filter>
-    </ClInclude>
-    <ClInclude Include="..\feature\src\psnr_tools.h">
-      <Filter>feature</Filter>
-    </ClInclude>
-    <ClInclude Include="..\feature\src\vif_options.h">
-      <Filter>feature</Filter>
-    </ClInclude>
-    <ClInclude Include="..\feature\src\vif_tools.h">
-      <Filter>feature</Filter>
-    </ClInclude>
-    <ClInclude Include="..\feature\src\ansnr_options.h">
-      <Filter>feature</Filter>
-    </ClInclude>
-    <ClInclude Include="..\feature\src\ansnr_tools.h">
-      <Filter>feature</Filter>
-    </ClInclude>
-    <ClInclude Include="src\pugixml\pugixml.hpp">
-      <Filter>pugixml</Filter>
-    </ClInclude>
-    <ClInclude Include="..\feature\src\common\convolution_internal.h">
-      <Filter>feature\common</Filter>
-    </ClInclude>
-    <ClInclude Include="..\feature\src\iqa\math_utils.h">
-      <Filter>feature\iqa</Filter>
-    </ClInclude>
-    <ClInclude Include="src\pugixml\pugiconfig.hpp">
-      <Filter>pugixml</Filter>
-    </ClInclude>
-    <ClInclude Include="..\feature\src\iqa\ssim_tools.h">
-      <Filter>feature\iqa</Filter>
-    </ClInclude>
-    <ClInclude Include="src\darray.h" />
-    <ClInclude Include="src\combo.h" />
-    <ClInclude Include="src\timer.h" />
-    <ClInclude Include="src\libvmaf.h" />
-    <ClInclude Include="..\feature\src\common\blur_array.h">
-      <Filter>feature</Filter>
-    </ClInclude>
-    <ClInclude Include="src\cpu_info.h" />
-  </ItemGroup>
-  <ItemGroup>
-    <Filter Include="feature">
-      <UniqueIdentifier>{573f84c9-4101-4008-a1ac-236fef485369}</UniqueIdentifier>
-    </Filter>
-    <Filter Include="feature\common">
-      <UniqueIdentifier>{66cba303-e12b-4aab-a136-6d6c4e08bcc9}</UniqueIdentifier>
-    </Filter>
-    <Filter Include="feature\iqa">
-      <UniqueIdentifier>{bc8c4849-570a-497b-8480-689c250aa363}</UniqueIdentifier>
-    </Filter>
-    <Filter Include="pugixml">
-      <UniqueIdentifier>{4b256925-f7e1-49e2-9c3b-33c128ce4e37}</UniqueIdentifier>
-    </Filter>
   </ItemGroup>
 </Project>
\ No newline at end of file

From fb8bea169c363802c95ac5b822b0e9e9e8e5e8bd Mon Sep 17 00:00:00 2001
From: Zhi Li <zli@netflix.com>
Date: Fri, 11 Jan 2019 11:13:14 -0800
Subject: [PATCH 06/29] Move BuildForWindows page and add a link to README.md.

---
 README.md                                             | 1 +
 BuildForWindows.md => resource/doc/BuildForWindows.md | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)
 rename BuildForWindows.md => resource/doc/BuildForWindows.md (83%)

diff --git a/README.md b/README.md
index 8e04f861e..be7bc1eea 100644
--- a/README.md
+++ b/README.md
@@ -27,6 +27,7 @@ There are a number of ways one can use the package:
   - [`vmafossexec` - a C++ "wrapper" executable](resource/doc/vmafossexec.md) offers running the prediction part of the algorithm in full, such that one can easily deploy VMAF in a production environment without needing to configure the Python dependencies. Additionally, `vmafossexec` offers a number of exclusive features, such as 1) speed optimization using multi-threading and skipping frames, 2) optionally computing PSNR, SSIM and MS-SSIM metrics in the output.
   - [`libvmaf.a` - a static library](resource/doc/libvmaf.md) offers an interface to incorporate VMAF into your C/C++ code. Using this library, VMAF is now included as a filter in [FFmpeg](http://ffmpeg.org/) main branch, and can be configured using: `./configure --enable-libvmaf --enable-version3`. See [this](https://ffmpeg.org/ffmpeg-filters.html#libvmaf) section for details. Using FFmpeg with `libvmaf` allows passing in compressed video bitstreams directly to VMAF.
   - [VMAF Dockerfile](Dockerfile) generates a VMAF docker image from the [VMAF Python library](resource/doc/VMAF_Python_library.md). Refer to [this](resource/doc/docker.md) document for detailed usages.
+  - Build VMAF on Windows: follow instructions on [this](resource/doc/BuildForWindows.md) page.
 
 ## Datasets
 
diff --git a/BuildForWindows.md b/resource/doc/BuildForWindows.md
similarity index 83%
rename from BuildForWindows.md
rename to resource/doc/BuildForWindows.md
index 1200f5269..7b84c3656 100644
--- a/BuildForWindows.md
+++ b/resource/doc/BuildForWindows.md
@@ -2,7 +2,7 @@
   - Visual Studio 2015 on Windows
   
 # Steps
-  - 1.open [vmaf.sln](vmaf.sln) in Visual Studio 2015 
+  - 1.open [vmaf.sln](../../vmaf.sln) in Visual Studio 2015 
   - 2.Select Build => Batch Build from menu
   - 3.Select the Solution Config as your want or Select All, then Build.
   - 4.After Build, you will find all the build result in $(SolutionDir)/x64/$(Configuration), and there is examples.bat that you can run.

From 1a512ffe805e78060342746d0a743c6fc0be6403 Mon Sep 17 00:00:00 2001
From: Christos Bampis <christosb@netflix.com>
Date: Tue, 22 Jan 2019 18:30:09 -0800
Subject: [PATCH 07/29] Print out individual vmaf bootstrap scores in text file
 from vmafossexec.

---
 wrapper/src/vmaf.cpp | 65 ++++++++++++++++++++++++++------------------
 1 file changed, 39 insertions(+), 26 deletions(-)

diff --git a/wrapper/src/vmaf.cpp b/wrapper/src/vmaf.cpp
index d1369b98a..8ffda6d69 100644
--- a/wrapper/src/vmaf.cpp
+++ b/wrapper/src/vmaf.cpp
@@ -900,6 +900,9 @@ void BootstrapVmafQualityRunner::_postproc_transform_clip(
     scoreStdDev *= slope;
 }
 
+static const char BOOTSTRAP_MODEL_NAME_TRAILING_ZEROS[] = "%04d";
+static const int BOOTSTRAP_MODEL_NAME_BUF_SIZE = 100;
+
 void BootstrapVmafQualityRunner::_set_prediction_result(
         std::vector<VmafPredictionStruct> predictionStructs,
         Result& result) {
@@ -922,13 +925,13 @@ void BootstrapVmafQualityRunner::_set_prediction_result(
     size_t num_models = predictionStructs.at(0).vmafMultiModelPrediction.size();
     std::vector<double> perModelScore;
     // character array to put the name of the vmaf bootstrap model, e.g. vmaf_0001 is the first one
-    char char_buffer[50];
+    char char_buffer[BOOTSTRAP_MODEL_NAME_BUF_SIZE];
 
     for (size_t j = 0; j < num_models; j++) {
         for (size_t i = 0; i < predictionStructs.size(); i++) {
             perModelScore.push_back(predictionStructs.at(i).vmafMultiModelPrediction.at(j));
         }
-        sprintf(char_buffer, "%04d", j + 1);
+        sprintf(char_buffer, BOOTSTRAP_MODEL_NAME_TRAILING_ZEROS, j + 1);
         result.set_scores(BOOSTRAP_VMAF_MODEL_PREFIX + std::string(char_buffer), perModelScore);
         perModelScore.clear();
     }
@@ -1000,30 +1003,6 @@ double RunVmaf(const char* fmt, int width, int height,
 
     std::vector<std::string> result_keys = result.get_keys();
 
-    int num_bootstrap_models = 0;
-    std::string bootstrap_model_list_str = "";
-
-    // determine number of bootstrap models (if any) and construct a comma-separated string of bootstrap vmaf model names
-    for (size_t j=0; j<result_keys.size(); j++)
-    {
-        if (result_keys[j].find(BOOSTRAP_VMAF_MODEL_PREFIX)!= std::string::npos)
-        {
-            if (num_bootstrap_models == 0)
-            {
-                bootstrap_model_list_str += result_keys[j] + ",";
-            }
-            else if (num_bootstrap_models == 1)
-            {
-                bootstrap_model_list_str += result_keys[j];
-            }
-            else
-            {
-                bootstrap_model_list_str += "," + result_keys[j];
-            }
-            num_bootstrap_models += 1;
-        }
-    }
-
     double aggregate_psnr = 0.0, aggregate_ssim = 0.0, aggregate_ms_ssim = 0.0;
     if (result.has_scores("psnr"))
         aggregate_psnr = result.get_score("psnr");
@@ -1053,6 +1032,40 @@ double RunVmaf(const char* fmt, int width, int height,
             printf("MS-SSIM score = %f\n", aggregate_ms_ssim);
     }
 
+    int num_bootstrap_models = 0;
+    std::string bootstrap_model_list_str = "";
+
+    // character array to put the name of the vmaf bootstrap model, e.g. 0001 is the first one
+    char char_buffer[BOOTSTRAP_MODEL_NAME_BUF_SIZE];
+
+    // determine number of bootstrap models (if any) and construct a comma-separated string of bootstrap vmaf model names
+    for (size_t j=0; j<result_keys.size(); j++)
+    {
+        if (result_keys[j].find(BOOSTRAP_VMAF_MODEL_PREFIX)!= std::string::npos)
+        {
+            if (num_bootstrap_models == 0)
+            {
+                bootstrap_model_list_str += result_keys[j] + ",";
+            }
+            else if (num_bootstrap_models == 1)
+            {
+                bootstrap_model_list_str += result_keys[j];
+            }
+            else
+            {
+                bootstrap_model_list_str += "," + result_keys[j];
+            }
+            sprintf(char_buffer, BOOTSTRAP_MODEL_NAME_TRAILING_ZEROS, num_bootstrap_models + 1);
+            if (pool_method) {
+                printf("VMAF score (%s), model %s = %f\n", pool_method, char_buffer, result.get_score(result_keys[j]));
+            }
+            else {
+                printf("VMAF score, model %s = %f\n", char_buffer, result.get_score(result_keys[j]));
+            }
+            num_bootstrap_models += 1;
+        }
+    }
+
     if (log_path != NULL && log_fmt !=NULL && (strcmp(log_fmt, "json")==0))
     {
         /* output to json */

From e5f0870094e26242752ac93e2e54d2b5e9468eb6 Mon Sep 17 00:00:00 2001
From: Christos Bampis <christosb@netflix.com>
Date: Wed, 23 Jan 2019 16:00:56 -0800
Subject: [PATCH 08/29] Use std::string in bootstrap model names.

---
 wrapper/src/vmaf.cpp | 24 ++++++++++++------------
 wrapper/src/vmaf.h   |  2 +-
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/wrapper/src/vmaf.cpp b/wrapper/src/vmaf.cpp
index 8ffda6d69..8f50c47b4 100644
--- a/wrapper/src/vmaf.cpp
+++ b/wrapper/src/vmaf.cpp
@@ -900,8 +900,14 @@ void BootstrapVmafQualityRunner::_postproc_transform_clip(
     scoreStdDev *= slope;
 }
 
-static const char BOOTSTRAP_MODEL_NAME_TRAILING_ZEROS[] = "%04d";
-static const int BOOTSTRAP_MODEL_NAME_BUF_SIZE = 100;
+static const int BOOTSTRAP_MODEL_NAME_PRECISION = 4;
+
+std::string to_zero_lead(const int value, const unsigned precision)
+{
+     std::ostringstream oss;
+     oss << std::setw(precision) << std::setfill('0') << value;
+     return oss.str();
+}
 
 void BootstrapVmafQualityRunner::_set_prediction_result(
         std::vector<VmafPredictionStruct> predictionStructs,
@@ -924,15 +930,13 @@ void BootstrapVmafQualityRunner::_set_prediction_result(
     // num_models is same across frames, so just use first frame length
     size_t num_models = predictionStructs.at(0).vmafMultiModelPrediction.size();
     std::vector<double> perModelScore;
-    // character array to put the name of the vmaf bootstrap model, e.g. vmaf_0001 is the first one
-    char char_buffer[BOOTSTRAP_MODEL_NAME_BUF_SIZE];
+    // name of the vmaf bootstrap model, e.g. vmaf_0001 is the first one
 
     for (size_t j = 0; j < num_models; j++) {
         for (size_t i = 0; i < predictionStructs.size(); i++) {
             perModelScore.push_back(predictionStructs.at(i).vmafMultiModelPrediction.at(j));
         }
-        sprintf(char_buffer, BOOTSTRAP_MODEL_NAME_TRAILING_ZEROS, j + 1);
-        result.set_scores(BOOSTRAP_VMAF_MODEL_PREFIX + std::string(char_buffer), perModelScore);
+        result.set_scores(BOOSTRAP_VMAF_MODEL_PREFIX + to_zero_lead(j + 1, BOOTSTRAP_MODEL_NAME_PRECISION), perModelScore);
         perModelScore.clear();
     }
 
@@ -1035,9 +1039,6 @@ double RunVmaf(const char* fmt, int width, int height,
     int num_bootstrap_models = 0;
     std::string bootstrap_model_list_str = "";
 
-    // character array to put the name of the vmaf bootstrap model, e.g. 0001 is the first one
-    char char_buffer[BOOTSTRAP_MODEL_NAME_BUF_SIZE];
-
     // determine number of bootstrap models (if any) and construct a comma-separated string of bootstrap vmaf model names
     for (size_t j=0; j<result_keys.size(); j++)
     {
@@ -1055,12 +1056,11 @@ double RunVmaf(const char* fmt, int width, int height,
             {
                 bootstrap_model_list_str += "," + result_keys[j];
             }
-            sprintf(char_buffer, BOOTSTRAP_MODEL_NAME_TRAILING_ZEROS, num_bootstrap_models + 1);
             if (pool_method) {
-                printf("VMAF score (%s), model %s = %f\n", pool_method, char_buffer, result.get_score(result_keys[j]));
+                printf("VMAF score (%s), model %s = %f\n", pool_method, to_zero_lead(num_bootstrap_models + 1, BOOTSTRAP_MODEL_NAME_PRECISION).c_str(), result.get_score(result_keys[j]));
             }
             else {
-                printf("VMAF score, model %s = %f\n", char_buffer, result.get_score(result_keys[j]));
+                printf("VMAF score, model %s = %f\n", to_zero_lead(num_bootstrap_models + 1, BOOTSTRAP_MODEL_NAME_PRECISION).c_str(), result.get_score(result_keys[j]));
             }
             num_bootstrap_models += 1;
         }
diff --git a/wrapper/src/vmaf.h b/wrapper/src/vmaf.h
index 54c720310..0a1a934d6 100644
--- a/wrapper/src/vmaf.h
+++ b/wrapper/src/vmaf.h
@@ -36,7 +36,7 @@
 #include "chooseser.h"
 #include "darray.h"
 
-static const char BOOSTRAP_VMAF_MODEL_PREFIX[] = "vmaf_";
+static const std::string BOOSTRAP_VMAF_MODEL_PREFIX = "vmaf_";
 
 double RunVmaf(const char* fmt, int width, int height,
                int (*read_frame)(float *ref_data, float *main_data, float *temp_data, int stride, void *user_data),

From 2deaf4cc5d9c100cc5734327cb09c6d666ff4b94 Mon Sep 17 00:00:00 2001
From: Holy Wu <HolyWu@users.noreply.github.com>
Date: Sat, 26 Jan 2019 16:04:49 +0800
Subject: [PATCH 09/29] Fix incorrect VMAF score on VS2015 and refactor MSVC
 project files

1. Fix incorrect VMAF score on VS2015 due to the instruction set of convolution_avx.c is not set to /arch:AVX.
2. Refactor MSVC project files. Remove redundant settings and cleanup.
---
 .../{vs2015/moment => msvc}/moment.vcxproj    |  44 ++----
 .../moment => msvc}/moment.vcxproj.filters    |   2 +-
 .../{vs2015/ms_ssim => msvc}/ms_ssim.vcxproj  |  44 ++----
 .../ms_ssim => msvc}/ms_ssim.vcxproj.filters  |   2 +-
 feature/{vs2015/psnr => msvc}/psnr.vcxproj    |  44 ++----
 .../psnr => msvc}/psnr.vcxproj.filters        |   2 +-
 feature/{vs2015/ssim => msvc}/ssim.vcxproj    |  40 ++----
 .../ssim => msvc}/ssim.vcxproj.filters        |   2 +-
 feature/{vs2015/vmaf => msvc}/vmaf.vcxproj    |  42 ++----
 .../vmaf => msvc}/vmaf.vcxproj.filters        |   2 +-
 pthreads/COPKG/.vs/pthreads/v14/.suo          | Bin 18944 -> 0 bytes
 pthreads/COPKG/pthreads.sln                   |  28 ----
 pthreads/COPKG/pthreads.vcxproj               | 130 ++++++------------
 pthreads/COPKG/pthreads.vcxproj.filters       |  34 +++--
 pthreads/COPKG/pthreads.vcxproj.user          |   4 -
 ptools/ptools.vcxproj                         |  86 +++++-------
 ptools/ptools.vcxproj.filters                 |  69 ++++++++++
 vmaf.sln                                      |  36 +----
 wrapper/libvmaf.vcxproj                       |  33 ++---
 wrapper/libvmaf.vcxproj.filters               |  24 ++--
 .../{wrapper.vcxproj => vmafossexec.vcxproj}  |  52 +++----
 wrapper/vmafossexec.vcxproj.filters           |  22 +++
 wrapper/wrapper.vcxproj.filters               |   6 -
 23 files changed, 316 insertions(+), 432 deletions(-)
 rename feature/{vs2015/moment => msvc}/moment.vcxproj (61%)
 rename feature/{vs2015/moment => msvc}/moment.vcxproj.filters (94%)
 rename feature/{vs2015/ms_ssim => msvc}/ms_ssim.vcxproj (61%)
 rename feature/{vs2015/ms_ssim => msvc}/ms_ssim.vcxproj.filters (94%)
 rename feature/{vs2015/psnr => msvc}/psnr.vcxproj (61%)
 rename feature/{vs2015/psnr => msvc}/psnr.vcxproj.filters (94%)
 rename feature/{vs2015/ssim => msvc}/ssim.vcxproj (68%)
 rename feature/{vs2015/ssim => msvc}/ssim.vcxproj.filters (94%)
 rename feature/{vs2015/vmaf => msvc}/vmaf.vcxproj (62%)
 rename feature/{vs2015/vmaf => msvc}/vmaf.vcxproj.filters (94%)
 delete mode 100644 pthreads/COPKG/.vs/pthreads/v14/.suo
 delete mode 100644 pthreads/COPKG/pthreads.sln
 delete mode 100644 pthreads/COPKG/pthreads.vcxproj.user
 create mode 100644 ptools/ptools.vcxproj.filters
 rename wrapper/{wrapper.vcxproj => vmafossexec.vcxproj} (61%)
 create mode 100644 wrapper/vmafossexec.vcxproj.filters
 delete mode 100644 wrapper/wrapper.vcxproj.filters

diff --git a/feature/vs2015/moment/moment.vcxproj b/feature/msvc/moment.vcxproj
similarity index 61%
rename from feature/vs2015/moment/moment.vcxproj
rename to feature/msvc/moment.vcxproj
index 4215cd149..38bd75f22 100644
--- a/feature/vs2015/moment/moment.vcxproj
+++ b/feature/msvc/moment.vcxproj
@@ -10,10 +10,8 @@
       <Platform>x64</Platform>
     </ProjectConfiguration>
   </ItemGroup>
-  <ItemGroup>
-    <ClCompile Include="..\..\src\moment_main.c" />
-  </ItemGroup>
   <PropertyGroup Label="Globals">
+    <UseNativeEnvironment>true</UseNativeEnvironment>
     <ProjectGuid>{D67BDBF2-D42F-465D-BABD-A381BFFAA373}</ProjectGuid>
     <Keyword>Win32Proj</Keyword>
     <RootNamespace>moment</RootNamespace>
@@ -45,54 +43,40 @@
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
   <PropertyGroup Label="UserMacros" />
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <LinkIncremental>true</LinkIncremental>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <LinkIncremental>false</LinkIncremental>
-  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <ClCompile>
-      <PrecompiledHeader>
-      </PrecompiledHeader>
       <WarningLevel>Level3</WarningLevel>
-      <Optimization>Disabled</Optimization>
-      <PreprocessorDefinitions>MULTI_THREADING;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;PTW32_STATIC_LIB;OC_NEW_STYLE_INCLUDES;WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <AdditionalIncludeDirectories>..\feature\src\common;..\ptools;..\ptools\opencontainers_1_8_4\include;..\pthreads;..\feature\src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <SDLCheck>true</SDLCheck>
-      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <PreprocessorDefinitions>_DEBUG;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
     </ClCompile>
     <Link>
       <SubSystem>Console</SubSystem>
-      <GenerateDebugInformation>true</GenerateDebugInformation>
-      <AdditionalLibraryDirectories>$(SolutionDir)\$(Platform)\$(Configuration)\;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
-      <AdditionalDependencies>libvmaf.lib;%(AdditionalDependencies)</AdditionalDependencies>
     </Link>
   </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <ClCompile>
       <WarningLevel>Level3</WarningLevel>
-      <PrecompiledHeader>
-      </PrecompiledHeader>
-      <Optimization>MaxSpeed</Optimization>
-      <FunctionLevelLinking>true</FunctionLevelLinking>
-      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <PreprocessorDefinitions>MULTI_THREADING;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;PTW32_STATIC_LIB;OC_NEW_STYLE_INCLUDES;WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <AdditionalIncludeDirectories>..\feature\src\common;..\ptools;..\ptools\opencontainers_1_8_4\include;..\pthreads;..\feature\src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
-      <SDLCheck>true</SDLCheck>
-      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <PreprocessorDefinitions>NDEBUG;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <BufferSecurityCheck>false</BufferSecurityCheck>
     </ClCompile>
     <Link>
       <SubSystem>Console</SubSystem>
       <EnableCOMDATFolding>true</EnableCOMDATFolding>
       <OptimizeReferences>true</OptimizeReferences>
-      <GenerateDebugInformation>true</GenerateDebugInformation>
-      <AdditionalLibraryDirectories>$(SolutionDir)\$(Platform)\$(Configuration)\;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
-      <AdditionalDependencies>libvmaf.lib;%(AdditionalDependencies)</AdditionalDependencies>
     </Link>
   </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\src\moment_main.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\..\wrapper\libvmaf.vcxproj">
+      <Project>{5f5103f4-e473-4476-8e7b-fd3465e872b1}</Project>
+    </ProjectReference>
+  </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
diff --git a/feature/vs2015/moment/moment.vcxproj.filters b/feature/msvc/moment.vcxproj.filters
similarity index 94%
rename from feature/vs2015/moment/moment.vcxproj.filters
rename to feature/msvc/moment.vcxproj.filters
index 4acb18b59..c712dd0c8 100644
--- a/feature/vs2015/moment/moment.vcxproj.filters
+++ b/feature/msvc/moment.vcxproj.filters
@@ -15,7 +15,7 @@
     </Filter>
   </ItemGroup>
   <ItemGroup>
-    <ClCompile Include="..\..\src\moment_main.c">
+    <ClCompile Include="..\src\moment_main.c">
       <Filter>Source Files</Filter>
     </ClCompile>
   </ItemGroup>
diff --git a/feature/vs2015/ms_ssim/ms_ssim.vcxproj b/feature/msvc/ms_ssim.vcxproj
similarity index 61%
rename from feature/vs2015/ms_ssim/ms_ssim.vcxproj
rename to feature/msvc/ms_ssim.vcxproj
index c69acbfc9..08296a224 100644
--- a/feature/vs2015/ms_ssim/ms_ssim.vcxproj
+++ b/feature/msvc/ms_ssim.vcxproj
@@ -10,10 +10,8 @@
       <Platform>x64</Platform>
     </ProjectConfiguration>
   </ItemGroup>
-  <ItemGroup>
-    <ClCompile Include="..\..\src\ms_ssim_main.c" />
-  </ItemGroup>
   <PropertyGroup Label="Globals">
+    <UseNativeEnvironment>true</UseNativeEnvironment>
     <ProjectGuid>{CF8FA427-306B-4803-9F23-31C229A630B6}</ProjectGuid>
     <Keyword>Win32Proj</Keyword>
     <RootNamespace>ms_ssim</RootNamespace>
@@ -45,54 +43,40 @@
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
   <PropertyGroup Label="UserMacros" />
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <LinkIncremental>true</LinkIncremental>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <LinkIncremental>false</LinkIncremental>
-  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <ClCompile>
-      <PrecompiledHeader>
-      </PrecompiledHeader>
       <WarningLevel>Level3</WarningLevel>
-      <Optimization>Disabled</Optimization>
-      <PreprocessorDefinitions>MULTI_THREADING;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;PTW32_STATIC_LIB;OC_NEW_STYLE_INCLUDES;WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <SDLCheck>true</SDLCheck>
-      <AdditionalIncludeDirectories>..\feature\src\common;..\ptools;..\ptools\opencontainers_1_8_4\include;..\pthreads;..\feature\src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
-      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <PreprocessorDefinitions>_DEBUG;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
     </ClCompile>
     <Link>
       <SubSystem>Console</SubSystem>
-      <GenerateDebugInformation>true</GenerateDebugInformation>
-      <AdditionalLibraryDirectories>$(SolutionDir)\$(Platform)\$(Configuration)\;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
-      <AdditionalDependencies>libvmaf.lib;%(AdditionalDependencies)</AdditionalDependencies>
     </Link>
   </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <ClCompile>
       <WarningLevel>Level3</WarningLevel>
-      <PrecompiledHeader>
-      </PrecompiledHeader>
-      <Optimization>MaxSpeed</Optimization>
-      <FunctionLevelLinking>true</FunctionLevelLinking>
-      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <PreprocessorDefinitions>MULTI_THREADING;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;PTW32_STATIC_LIB;OC_NEW_STYLE_INCLUDES;WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <SDLCheck>true</SDLCheck>
-      <AdditionalIncludeDirectories>..\feature\src\common;..\ptools;..\ptools\opencontainers_1_8_4\include;..\pthreads;..\feature\src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
-      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <PreprocessorDefinitions>NDEBUG;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <BufferSecurityCheck>false</BufferSecurityCheck>
     </ClCompile>
     <Link>
       <SubSystem>Console</SubSystem>
       <EnableCOMDATFolding>true</EnableCOMDATFolding>
       <OptimizeReferences>true</OptimizeReferences>
-      <GenerateDebugInformation>true</GenerateDebugInformation>
-      <AdditionalLibraryDirectories>$(SolutionDir)\$(Platform)\$(Configuration)\;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
-      <AdditionalDependencies>libvmaf.lib;%(AdditionalDependencies)</AdditionalDependencies>
     </Link>
   </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\src\ms_ssim_main.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\..\wrapper\libvmaf.vcxproj">
+      <Project>{5f5103f4-e473-4476-8e7b-fd3465e872b1}</Project>
+    </ProjectReference>
+  </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
diff --git a/feature/vs2015/ms_ssim/ms_ssim.vcxproj.filters b/feature/msvc/ms_ssim.vcxproj.filters
similarity index 94%
rename from feature/vs2015/ms_ssim/ms_ssim.vcxproj.filters
rename to feature/msvc/ms_ssim.vcxproj.filters
index 3de31b095..16426f5a2 100644
--- a/feature/vs2015/ms_ssim/ms_ssim.vcxproj.filters
+++ b/feature/msvc/ms_ssim.vcxproj.filters
@@ -15,7 +15,7 @@
     </Filter>
   </ItemGroup>
   <ItemGroup>
-    <ClCompile Include="..\..\src\ms_ssim_main.c">
+    <ClCompile Include="..\src\ms_ssim_main.c">
       <Filter>Source Files</Filter>
     </ClCompile>
   </ItemGroup>
diff --git a/feature/vs2015/psnr/psnr.vcxproj b/feature/msvc/psnr.vcxproj
similarity index 61%
rename from feature/vs2015/psnr/psnr.vcxproj
rename to feature/msvc/psnr.vcxproj
index dcaa53f6d..6e1dc2dc3 100644
--- a/feature/vs2015/psnr/psnr.vcxproj
+++ b/feature/msvc/psnr.vcxproj
@@ -10,10 +10,8 @@
       <Platform>x64</Platform>
     </ProjectConfiguration>
   </ItemGroup>
-  <ItemGroup>
-    <ClCompile Include="..\..\src\psnr_main.c" />
-  </ItemGroup>
   <PropertyGroup Label="Globals">
+    <UseNativeEnvironment>true</UseNativeEnvironment>
     <ProjectGuid>{2DC3E418-09C6-49F4-A8DA-04C614D6016D}</ProjectGuid>
     <Keyword>Win32Proj</Keyword>
     <RootNamespace>psnr</RootNamespace>
@@ -45,54 +43,40 @@
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
   <PropertyGroup Label="UserMacros" />
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <LinkIncremental>true</LinkIncremental>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <LinkIncremental>false</LinkIncremental>
-  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <ClCompile>
-      <PrecompiledHeader>
-      </PrecompiledHeader>
       <WarningLevel>Level3</WarningLevel>
-      <Optimization>Disabled</Optimization>
-      <PreprocessorDefinitions>MULTI_THREADING;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;PTW32_STATIC_LIB;OC_NEW_STYLE_INCLUDES;WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <AdditionalIncludeDirectories>..\feature\src\common;..\ptools;..\ptools\opencontainers_1_8_4\include;..\pthreads;..\feature\src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <SDLCheck>true</SDLCheck>
-      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <PreprocessorDefinitions>_DEBUG;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
     </ClCompile>
     <Link>
       <SubSystem>Console</SubSystem>
-      <GenerateDebugInformation>true</GenerateDebugInformation>
-      <AdditionalLibraryDirectories>$(SolutionDir)\$(Platform)\$(Configuration)\;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
-      <AdditionalDependencies>libvmaf.lib;%(AdditionalDependencies)</AdditionalDependencies>
     </Link>
   </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <ClCompile>
       <WarningLevel>Level3</WarningLevel>
-      <PrecompiledHeader>
-      </PrecompiledHeader>
-      <Optimization>MaxSpeed</Optimization>
-      <FunctionLevelLinking>true</FunctionLevelLinking>
-      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <PreprocessorDefinitions>MULTI_THREADING;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;PTW32_STATIC_LIB;OC_NEW_STYLE_INCLUDES;WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <AdditionalIncludeDirectories>..\feature\src\common;..\ptools;..\ptools\opencontainers_1_8_4\include;..\pthreads;..\feature\src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
-      <SDLCheck>true</SDLCheck>
-      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <PreprocessorDefinitions>NDEBUG;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <BufferSecurityCheck>false</BufferSecurityCheck>
     </ClCompile>
     <Link>
       <SubSystem>Console</SubSystem>
       <EnableCOMDATFolding>true</EnableCOMDATFolding>
       <OptimizeReferences>true</OptimizeReferences>
-      <GenerateDebugInformation>true</GenerateDebugInformation>
-      <AdditionalLibraryDirectories>$(SolutionDir)\$(Platform)\$(Configuration)\;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
-      <AdditionalDependencies>libvmaf.lib;%(AdditionalDependencies)</AdditionalDependencies>
     </Link>
   </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\src\psnr_main.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\..\wrapper\libvmaf.vcxproj">
+      <Project>{5f5103f4-e473-4476-8e7b-fd3465e872b1}</Project>
+    </ProjectReference>
+  </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
diff --git a/feature/vs2015/psnr/psnr.vcxproj.filters b/feature/msvc/psnr.vcxproj.filters
similarity index 94%
rename from feature/vs2015/psnr/psnr.vcxproj.filters
rename to feature/msvc/psnr.vcxproj.filters
index 479b15852..10e602e30 100644
--- a/feature/vs2015/psnr/psnr.vcxproj.filters
+++ b/feature/msvc/psnr.vcxproj.filters
@@ -15,7 +15,7 @@
     </Filter>
   </ItemGroup>
   <ItemGroup>
-    <ClCompile Include="..\..\src\psnr_main.c">
+    <ClCompile Include="..\src\psnr_main.c">
       <Filter>Source Files</Filter>
     </ClCompile>
   </ItemGroup>
diff --git a/feature/vs2015/ssim/ssim.vcxproj b/feature/msvc/ssim.vcxproj
similarity index 68%
rename from feature/vs2015/ssim/ssim.vcxproj
rename to feature/msvc/ssim.vcxproj
index f51ef96a8..947fa17fa 100644
--- a/feature/vs2015/ssim/ssim.vcxproj
+++ b/feature/msvc/ssim.vcxproj
@@ -10,10 +10,8 @@
       <Platform>x64</Platform>
     </ProjectConfiguration>
   </ItemGroup>
-  <ItemGroup>
-    <ClCompile Include="..\..\src\ssim_main.c" />
-  </ItemGroup>
   <PropertyGroup Label="Globals">
+    <UseNativeEnvironment>true</UseNativeEnvironment>
     <ProjectGuid>{418D8FFD-D23A-4C56-8A94-D4B9D39083D1}</ProjectGuid>
     <Keyword>Win32Proj</Keyword>
     <RootNamespace>ssim</RootNamespace>
@@ -45,50 +43,40 @@
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
   <PropertyGroup Label="UserMacros" />
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <LinkIncremental>true</LinkIncremental>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <LinkIncremental>false</LinkIncremental>
-  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <ClCompile>
-      <PrecompiledHeader>
-      </PrecompiledHeader>
       <WarningLevel>Level3</WarningLevel>
-      <Optimization>Disabled</Optimization>
-      <PreprocessorDefinitions>MULTI_THREADING;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;PTW32_STATIC_LIB;OC_NEW_STYLE_INCLUDES;WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <SDLCheck>true</SDLCheck>
+      <PreprocessorDefinitions>_DEBUG;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
     </ClCompile>
     <Link>
       <SubSystem>Console</SubSystem>
-      <GenerateDebugInformation>true</GenerateDebugInformation>
-      <AdditionalDependencies>libvmaf.lib;%(AdditionalDependencies)</AdditionalDependencies>
-      <AdditionalLibraryDirectories>$(SolutionDir)\$(Platform)\$(Configuration)\;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
     </Link>
   </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <ClCompile>
       <WarningLevel>Level3</WarningLevel>
-      <PrecompiledHeader>
-      </PrecompiledHeader>
-      <Optimization>MaxSpeed</Optimization>
-      <FunctionLevelLinking>true</FunctionLevelLinking>
-      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <PreprocessorDefinitions>MULTI_THREADING;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;PTW32_STATIC_LIB;OC_NEW_STYLE_INCLUDES;WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <SDLCheck>true</SDLCheck>
+      <PreprocessorDefinitions>NDEBUG;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <BufferSecurityCheck>false</BufferSecurityCheck>
     </ClCompile>
     <Link>
       <SubSystem>Console</SubSystem>
       <EnableCOMDATFolding>true</EnableCOMDATFolding>
       <OptimizeReferences>true</OptimizeReferences>
-      <GenerateDebugInformation>true</GenerateDebugInformation>
-      <AdditionalDependencies>libvmaf.lib;%(AdditionalDependencies)</AdditionalDependencies>
-      <AdditionalLibraryDirectories>$(SolutionDir)\$(Platform)\$(Configuration)\;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
     </Link>
   </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\src\ssim_main.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\..\wrapper\libvmaf.vcxproj">
+      <Project>{5f5103f4-e473-4476-8e7b-fd3465e872b1}</Project>
+    </ProjectReference>
+  </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
diff --git a/feature/vs2015/ssim/ssim.vcxproj.filters b/feature/msvc/ssim.vcxproj.filters
similarity index 94%
rename from feature/vs2015/ssim/ssim.vcxproj.filters
rename to feature/msvc/ssim.vcxproj.filters
index 5c5c46ba4..287b5cae3 100644
--- a/feature/vs2015/ssim/ssim.vcxproj.filters
+++ b/feature/msvc/ssim.vcxproj.filters
@@ -15,7 +15,7 @@
     </Filter>
   </ItemGroup>
   <ItemGroup>
-    <ClCompile Include="..\..\src\ssim_main.c">
+    <ClCompile Include="..\src\ssim_main.c">
       <Filter>Source Files</Filter>
     </ClCompile>
   </ItemGroup>
diff --git a/feature/vs2015/vmaf/vmaf.vcxproj b/feature/msvc/vmaf.vcxproj
similarity index 62%
rename from feature/vs2015/vmaf/vmaf.vcxproj
rename to feature/msvc/vmaf.vcxproj
index fcc0f75ac..5e2e281e4 100644
--- a/feature/vs2015/vmaf/vmaf.vcxproj
+++ b/feature/msvc/vmaf.vcxproj
@@ -10,10 +10,8 @@
       <Platform>x64</Platform>
     </ProjectConfiguration>
   </ItemGroup>
-  <ItemGroup>
-    <ClCompile Include="..\..\src\vmaf_main.c" />
-  </ItemGroup>
   <PropertyGroup Label="Globals">
+    <UseNativeEnvironment>true</UseNativeEnvironment>
     <ProjectGuid>{054010E9-3004-4C24-B0F3-DCCE36D6B436}</ProjectGuid>
     <Keyword>Win32Proj</Keyword>
     <RootNamespace>vmaf</RootNamespace>
@@ -45,50 +43,40 @@
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
   <PropertyGroup Label="UserMacros" />
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <LinkIncremental>true</LinkIncremental>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <LinkIncremental>false</LinkIncremental>
-  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <ClCompile>
-      <PrecompiledHeader>
-      </PrecompiledHeader>
       <WarningLevel>Level3</WarningLevel>
-      <Optimization>Disabled</Optimization>
-      <PreprocessorDefinitions>MULTI_THREADING;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;PTW32_STATIC_LIB;OC_NEW_STYLE_INCLUDES;WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <AdditionalIncludeDirectories>..\feature\src\common;..\ptools;..\ptools\opencontainers_1_8_4\include;..\pthreads;..\feature\src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <SDLCheck>true</SDLCheck>
+      <PreprocessorDefinitions>_DEBUG;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
     </ClCompile>
     <Link>
       <SubSystem>Console</SubSystem>
-      <GenerateDebugInformation>true</GenerateDebugInformation>
-      <AdditionalLibraryDirectories>$(SolutionDir)\$(Platform)\$(Configuration)\;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
-      <AdditionalDependencies>libvmaf.lib;libpthread-static.lib;%(AdditionalDependencies)</AdditionalDependencies>
     </Link>
   </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <ClCompile>
       <WarningLevel>Level3</WarningLevel>
-      <PrecompiledHeader>
-      </PrecompiledHeader>
-      <Optimization>MaxSpeed</Optimization>
-      <FunctionLevelLinking>true</FunctionLevelLinking>
-      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <PreprocessorDefinitions>MULTI_THREADING;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;PTW32_STATIC_LIB;OC_NEW_STYLE_INCLUDES;WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <AdditionalIncludeDirectories>..\feature\src\common;..\ptools;..\ptools\opencontainers_1_8_4\include;..\pthreads;..\feature\src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>NDEBUG;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <BufferSecurityCheck>false</BufferSecurityCheck>
     </ClCompile>
     <Link>
       <SubSystem>Console</SubSystem>
       <EnableCOMDATFolding>true</EnableCOMDATFolding>
       <OptimizeReferences>true</OptimizeReferences>
-      <GenerateDebugInformation>true</GenerateDebugInformation>
-      <AdditionalLibraryDirectories>$(SolutionDir)\$(Platform)\$(Configuration)\;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
-      <AdditionalDependencies>libvmaf.lib;libpthread-static.lib;%(AdditionalDependencies)</AdditionalDependencies>
     </Link>
   </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\src\vmaf_main.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\..\wrapper\libvmaf.vcxproj">
+      <Project>{5f5103f4-e473-4476-8e7b-fd3465e872b1}</Project>
+    </ProjectReference>
+  </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
diff --git a/feature/vs2015/vmaf/vmaf.vcxproj.filters b/feature/msvc/vmaf.vcxproj.filters
similarity index 94%
rename from feature/vs2015/vmaf/vmaf.vcxproj.filters
rename to feature/msvc/vmaf.vcxproj.filters
index 6b6a90d60..2ebdec7f5 100644
--- a/feature/vs2015/vmaf/vmaf.vcxproj.filters
+++ b/feature/msvc/vmaf.vcxproj.filters
@@ -15,7 +15,7 @@
     </Filter>
   </ItemGroup>
   <ItemGroup>
-    <ClCompile Include="..\..\src\vmaf_main.c">
+    <ClCompile Include="..\src\vmaf_main.c">
       <Filter>Source Files</Filter>
     </ClCompile>
   </ItemGroup>
diff --git a/pthreads/COPKG/.vs/pthreads/v14/.suo b/pthreads/COPKG/.vs/pthreads/v14/.suo
deleted file mode 100644
index 64de26a89ae83672fc7a2a7ce94d19e583652071..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 18944
zcmeHOUyK_^8DA$U%_XI2nh;7F5;sX{ptok%yZ(1=LVZ485}IqS_2nN)xUSdhJLi1%
zVf)gACMX3VDg>w$q#`5)lprBg74@a4yip!do)Q)D!V?ifJWvVs1$dx2e!tzFyFJ_I
zTl+5YrMg*tyR);i^L_Kp_sx7W^UaOxoB#Urx3=Gu>cRnOt#oUqNBWp2J%IQc@&14$
ztwWq?w`OK$n7kH%=xVr&IPjcQ##NJoQd%nES&=TXDSd}+(mk_ne$OI*Js@xX#uNX3
zFK|EeecH)942)&zoOBr2^N6LT3yYL8|MNCTq?g_EzczoXA{pXq2Z5h>r=%h%s7Mwd
z16~*a%gwjYW3RL(&@TQo3fn@(K{zT+14~gVp#Fq059R@+RGe%J-iiO?_)5Jb9Sdgk
ze3(Lf(0$*D=ZD@3xbfXcXS4^;U2fc)Z4=VS!_9yP0b2lD0iOVD1AG$j5a3gQ?SO{?
zdjTwi{lz1Q?*TB*K4v{2=)N=0YPd5TxRd(-%7;g0_5Vs2^?w?2ww(I^8I&hl3(-Zx
zO45Hpx9A6`kJL-*KjYMY_Unv$Lnr#b0p*A?sQ)4^^#5K@n)tT*yuBP?yEX&11^Z;S
zKih31;64Dy8jj!1+TU9a`+vTB`~MwCquNu}IHm*vlmp5dWsc*+qX6pDV}N~t&jLON
zcpT6RkO3jU=K%^p1%v?_KnFwsQ9uk32P6P}fF}S?0`>zA0KBw{_<)-}i047TA;4ib
z?P)xZxbbK3JPH^FJPUXZkO7PUjscDXP5@2<P656II1M-hAg(XtISP0lU;wfJ6OaQ~
zfIMIfFb*gH&H*L>f==)AE1vY4C+=5j27oaO8)OU@?Vf(y{xN`W3bqg}lA2USZ5YcT
z<ptpI2HmxPf=DDz@3)2Gv=Jy<4E6XOOjTfYEU_f$n39I@M*GM~b^dhjQQ&7;lvBU>
zX{W4nen+XSyM>ZF3-`J858!vd_}Sl`S?l0aA+0*Nu0opoA+0j*VZ;^0R9pjyjY85K
zE4k4s?d!n)Wq&T}p8;o!whdaad7QHEdGLA+sTJD>cD^|D*57+kf9g8zK)>}LLf-e?
z`X7b#okRPM0hfcDHn$^n$88O+$2k9eH|o#2kf(m}e-U}#bc;!$_68)p3au>w^9v1B
zxEU9CbYlNge?@V}P*=`63}}}H5G`$?N1)~{7Eu4T=O14Dj(&0eVS3v7FsQ29GBCaf
z75h4nKh}vd!;k@9)2;P+U?dkR=;7<-GKAjO;S!}}+LjRhf1m^WoKp$;Z#DnooRH&7
ztAO^m<&Lt)&?>L^##TKmUY6nMV0P~97FW+_p@ZyiiWudl(Eg78H<!gw2lej<k@fbs
zBkv<<`)NpCZFW7NB29Yavaxnb2l&16-#Gqo)^B3$wR~tFZJ74kKZ$B^3}UFq-+cUa
z@;19e{{nmQf%9v>^7j<-5*X_7Q|3+7eX(|T{-&+ACa?uBoa6h&{{`e-X#5|9w9(R_
zTyp#&Ev*9Zx2=C%mw1&1144cM=kQ;I<YjFwaB?=kv;MSkX)pV&e=GSLLVYKJmFsX`
ztE;*8(I#%c{#>6KajUZi&pueGeXvg1=HtL@*`rYv76^5Z+Q>e+{(H!_PigloCGh{z
zksT+0@y&N$O8o8W585p&-YMw^-+uT{h4HT)+o1gT_$jkY+kBoN`E1O8?kV*wh5mNy
zyWIKx*`Fg=FC2npWI#sdOCs#=d2I7TAIDxX{tik>tobUifzv2oh0JMqQ-M#|zuIE<
zZ`<|{P0#l5FK_*+a{|9t$A`e*62O^>g<uu2FDya(a^MGT@kzuR_#fDf@^%4yimbh7
z!?`>|x9vZNilyCIZxNPOQfGe)+<bOc!qWa)3Vx17&4A;Xm{+f^|IEzX4eLK%Iq(}7
zl4p<Ovp4^vI{Oj-xdpblXL@K*m?CqDZN$>4442_L4&BCYGmniWn0OyYhi9LeK4{fK
zBjs|j+8aD>RjP$@X}=cIaj9BZ?G2`;i?!*BwZCLd*D6M_H#j_<Ef&nDt&1b&39Ga}
zt7!R1K9*P2TtqQ6;~tj3(=PUSCVio1l~`oRPRLlbTA?&nWtLq}o+y-Z<rhTSfr-)4
ze#4x=_rZcy%&q<Yn#{#&&6*6Q%Eh8()=)|{bkHhUm4X=>DA)-`<>K?<)2H2xnOX&Z
z_Xa1cX1P)<WalcRhZJ5uY7>hY5i=516Pj)*@x<BQ(+-D*L7FjU4H+feC#_N~bnI~G
zSk<Zs`g?;5XPhIX9nNZMHm}7aQ6r~CbxVt!J<AsBv5gAeRvFa5AhP9NSBlrC%9B$@
z#j317WK_p9g|Axh8(F{2m6fBFT%lwXXY=4ksYlpK>lj%eCuMizXkn~Wu2?CfYDs~>
zM*e)egHYS0;Gz7~hoz*|m*wZ3>$g_`DSIa9tU_{4mNsV))@K2$L9{)C*p<0}b=fF(
zX3k(vz;#2e6;%Ow%oKz6I%*YjcLRR}F^{hyo<qJe-c;E6A=vk4kSm2d?OPN1n8U)m
zB)!~q8cgXtXeKS+={hZw;4AsV+4oA|O%<yW)fGaEvsOXAT)E@zM46|IRPi*sBrEka
zP*ZdEZc=w-rI8-4-IA8?b-jH|^girqoK};hZ+D%d0@gX$i;_RDcAXl>|7-$W;u>JL
zG+eZg&?>Gx7FP&|&O>@AMHNs~z`8Ko?T1}&H&VqN6YiH5pl!|=dgt|xdnQHT&x4-1
zUQxVvqX%eCk(1h;K50Hhjg}9Dse>SF;|_kce+l5O1N<$cr?4Y9EIkeH81j&T|1Txn
zkDmz)os<PTd4u%RKVDt?s}HUndi{+JTi@T*b3hbCr+}NaTd${ndt>vP+ohjf+cfhB
z@y*7#^c#H%IMG0{0qKGI9h5bVNe_8<w!D^g%_+s^!gK96M?LBg%vjTQYIa;lSnme<
z=&i&Lhv;zPz2w=q)-mrtjJF*!(%y|9CLjOq?+?B8mtSAs^2-yOj!5@k+g|)%&z_^N
z%zW#=*WbOGW}!!<e{6m9z2EHFbMWoI-@N=-c9KGi)EmG1;N;7@-$}pG_h<7*|GD6L
zm1d-Pe(Srp-rafR>Q8cSzP9sUiRCDj80!P)O+)B`i*^sp*^Xgb8Pga^g)axa&Nv08
z7aGZa5scEX@~&eJ6obzX_c{hKH%h`HkdYckIz54s@B~UC&L&9Ob0_YRD9EeeyM`T;
z%a~2&;IlFxud{3K#Y%6z`FR9w&b~YIuDAW}mXo$8Y;U{eEHMOMI}5z#4rLYhOAv7S
zPlYh@^drsLx$j4g9BMqrb?!LdUOJq891ShD7%lWfTwYq{_OSi(gI<ugj|Ts|aI}GP
zJ_u=!p(WFhbNawBt|2CT;HVA!mbA5j`k>faLT%ucLT<m>!2OIQI!D?}=OINYNDuuj
zsRQiGo68v6tued?#omJCfb{jiK%au^MMX>MsaRrA9#oTYS&yd@ax$Du$x2ey(<x<8
zNoeWIm!@jt70bv~L+8y4Q<d_$OQ-tIh%|Z9z!GakyobgwU3zgqNvp|7Dk}GDk%X+L
zbyZIEM-^GqqLD;6oC+ty;maI_*q<{Lo>~bMt(W@c%!tDntlh?<R$P@WOE+Y!Oe?Zs
zg)Lc848=?+`F!5gF3)9GB8r~RCs5B^RCTk*61tmRRTU+oda_&5uxUjNMb7G4Vm5nj
zq3l^p&&3oyEN2ZZ&+K72t}7(Q1lmMG37g*1qe?^zhmEkT#tci=wY)B8qe@<mMWc~O
z)Y7A7Jmw|F%*ND&YRHj9EGz43J|ZXLx+W`{p_}<|ERxGvp6uaxT#KsVxSTh`rbCRO
zYshXB6AS0mxN&)4CA4RcE2E+D9jyem<;Pyd4XB@@)ZN6lkQ2G&2=m<iAt?ZxPZm;+
z4++F+>2WK{8D{5lulU%>>+Fp>4|~y9@&w-mW?!^CCNM5?4VK23YOl4n_F&{Xb2t5e
z?sor~&RcF}{Ml^ppJ}W6|99W{54Kyop}zNUW(kX*dlk~V1OAUZ&+{OFxJ7!aXBi*&
zxfw=mOMU_~xdM8w8fGcu_9`1Kd7g}&@9orBD*f-->C<lWi@$`t!vFL*FpSteXwmle
zyZ{R5?MPqr#e7dm4gK#qyW$##U;LBEE7ole+w0P0^xiyu%yTH#?7Y#>8GugwUyC06
zb90B)^FLrq+u?3M|I=;%ziyxZXuH9i{6M`Ip~>glo+NPd*)B5f!w<u19D|pwcTUW6
z05#h?o*uuWt}WSEI?i7NXu<9P_{Gn0N|fZ_VUJ7C*CJ-}!qZ^>3Y9pQBF=`1|8f2;
zum>-XbclZdeLqJU?wxQK#p{ndjBh*z)#?Nc>%SGad8W~C{dx9>>jn(1@E-$qa%R9@
z*)p6T>h3&$!?QG8`1Fh4%m2mBpRs*f1)hK4c?wRxZU=sjPv;?h_Hu6%LU{g=ctvQE
zr6I{%|3hfs94@wg2J%;e1oGq)Q!3#9Vu9m7&r@`YpR1Opt$$pm=T2U3_M}^@b{BM!
n{{lCkBE8i!g^xUg$?#F}|9=#9@mqhYp0L0Coj>^K`v3nQXyyId

diff --git a/pthreads/COPKG/pthreads.sln b/pthreads/COPKG/pthreads.sln
deleted file mode 100644
index 0d0483282..000000000
--- a/pthreads/COPKG/pthreads.sln
+++ /dev/null
@@ -1,28 +0,0 @@
-﻿
-Microsoft Visual Studio Solution File, Format Version 12.00
-# Visual Studio 14
-VisualStudioVersion = 14.0.25420.1
-MinimumVisualStudioVersion = 10.0.40219.1
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pthreads", "pthreads.vcxproj", "{03A4C79F-F1A8-48C9-A2AC-0A14EC0F093E}"
-EndProject
-Global
-	GlobalSection(SolutionConfigurationPlatforms) = preSolution
-		Debug|x64 = Debug|x64
-		Debug|x86 = Debug|x86
-		Release|x64 = Release|x64
-		Release|x86 = Release|x86
-	EndGlobalSection
-	GlobalSection(ProjectConfigurationPlatforms) = postSolution
-		{03A4C79F-F1A8-48C9-A2AC-0A14EC0F093E}.Debug|x64.ActiveCfg = Debug|x64
-		{03A4C79F-F1A8-48C9-A2AC-0A14EC0F093E}.Debug|x64.Build.0 = Debug|x64
-		{03A4C79F-F1A8-48C9-A2AC-0A14EC0F093E}.Debug|x86.ActiveCfg = Debug|Win32
-		{03A4C79F-F1A8-48C9-A2AC-0A14EC0F093E}.Debug|x86.Build.0 = Debug|Win32
-		{03A4C79F-F1A8-48C9-A2AC-0A14EC0F093E}.Release|x64.ActiveCfg = Release|x64
-		{03A4C79F-F1A8-48C9-A2AC-0A14EC0F093E}.Release|x64.Build.0 = Release|x64
-		{03A4C79F-F1A8-48C9-A2AC-0A14EC0F093E}.Release|x86.ActiveCfg = Release|Win32
-		{03A4C79F-F1A8-48C9-A2AC-0A14EC0F093E}.Release|x86.Build.0 = Release|Win32
-	EndGlobalSection
-	GlobalSection(SolutionProperties) = preSolution
-		HideSolutionNode = FALSE
-	EndGlobalSection
-EndGlobal
diff --git a/pthreads/COPKG/pthreads.vcxproj b/pthreads/COPKG/pthreads.vcxproj
index 4f9cc09c4..236855386 100644
--- a/pthreads/COPKG/pthreads.vcxproj
+++ b/pthreads/COPKG/pthreads.vcxproj
@@ -11,148 +11,98 @@
     </ProjectConfiguration>
   </ItemGroup>
   <PropertyGroup Label="Globals">
+    <UseNativeEnvironment>true</UseNativeEnvironment>
     <ProjectGuid>{03A4C79F-F1A8-48C9-A2AC-0A14EC0F093E}</ProjectGuid>
-    <Keyword>pthreads</Keyword>
-    <RootNamespace>pthread</RootNamespace>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>pthreads</RootNamespace>
     <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
-  <PropertyGroup Label="ConfigurationSettings">
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
     <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
     <PlatformToolset>v140</PlatformToolset>
-    <CallingConvention>Cdecl</CallingConvention>
-    <LibName>libpthread</LibName>
-  </PropertyGroup>
-  <Target Name="Info" BeforeTargets="Build">
-    <Message Text="  Configuration     :  '$(Configuration)'" />
-    <Message Text="  PlatformToolset   :  '$(PlatformToolset)'" />
-    <Message Text="  Platform          :  '$(Platform)'" />
-    <Message Text="  ConfigurationType :  '$(ConfigurationType)'" />
-    <Message Text="  CallingConvention :  '$(CallingConvention)'" />
-  </Target>
-  <!--
-  <PropertyGroup Label="Configuration">
-    <ConfigurationType>StaticLibrary</ConfigurationType>
-    <CharacterSet>Unicode</CharacterSet>
-  </PropertyGroup>
-  -->
-  <PropertyGroup Label="Configuration">
-    <UseDebugLibraries Condition="'$(Configuration)'=='Debug'">true</UseDebugLibraries>
-    <UseDebugLibraries Condition="'$(Configuration)'!='Debug'">false</UseDebugLibraries>
     <CharacterSet>MultiByte</CharacterSet>
   </PropertyGroup>
-  <!--
-  <PropertyGroup Condition="'$(Configuration)'=='Release'" Label="Configuration">
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
     <WholeProgramOptimization>true</WholeProgramOptimization>
-    <CharacterSet>Unicode</CharacterSet>
-  </PropertyGroup>
-  <PropertyGroup Label="Configuration" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <PlatformToolset>v110</PlatformToolset>
-  </PropertyGroup>
-  <PropertyGroup Label="Configuration" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <PlatformToolset>v110</PlatformToolset>
-  </PropertyGroup>
-  <PropertyGroup Label="Configuration" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <PlatformToolset>v110</PlatformToolset>
-  </PropertyGroup>
-  <PropertyGroup Label="Configuration" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <PlatformToolset>v110</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
   </PropertyGroup>
-  -->
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
   </ImportGroup>
-  <ImportGroup Label="PropertySheets">
+  <ImportGroup Label="Shared">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
   <PropertyGroup Label="UserMacros" />
-  <PropertyGroup />
-  <PropertyGroup>
-    <OutNameSuffix Condition="'$(CallingConvention)|$(ConfigurationType)'=='Cdecl|DynamicLibrary'">
-    </OutNameSuffix>
-    <OutNameSuffix Condition="'$(CallingConvention)|$(ConfigurationType)'=='Cdecl|StaticLibrary'">-static</OutNameSuffix>
-    <OutNameSuffix Condition="'$(CallingConvention)|$(ConfigurationType)'=='stdcall|DynamicLibrary'">-stdcall</OutNameSuffix>
-    <OutNameSuffix Condition="'$(CallingConvention)|$(ConfigurationType)'=='stdcall|StaticLibrary'">-static-stdcall</OutNameSuffix>
-    <OutName>$(LibName)$(OutNameSuffix)</OutName>
-    <ProjectRootDir Condition="'$(SolutionDir)' != ''">$(SolutionDir)</ProjectRootDir>
-    <ProjectRootDir Condition="'$(SolutionDir)' == ''">$(MSBuildThisFileDirectory)..\</ProjectRootDir>
-    <OutDir>$(SolutionDir)\$(Platform)\$(Configuration)\</OutDir>
-    <IntDir>$(Platform)\$(Configuration)\</IntDir>
-    <OutBinDir>$(OutDir)bin\</OutBinDir>
-    <OutLibDir>$(OutDir)lib\</OutLibDir>
-    <TargetName>$(OutName)</TargetName>
-  </PropertyGroup>
-  <ItemDefinitionGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <ClCompile>
       <AdditionalIncludeDirectories>..\;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <WarningLevel>Level3</WarningLevel>
-      <PreprocessorDefinitions>WIN32;_LIB;PTW32_BUILD;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <PreprocessorDefinitions Condition="'$(ConfigurationType)'=='StaticLibrary'">PTW32_STATIC_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <SDLCheck>true</SDLCheck>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <PreprocessorDefinitions>PTW32_BUILD_INLINED;PTW32_STATIC_LIB;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
     </ClCompile>
     <Link>
       <SubSystem>Windows</SubSystem>
-      <GenerateDebugInformation>true</GenerateDebugInformation>
     </Link>
   </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <ClCompile>
-      <Optimization>Disabled</Optimization>
-      <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <RuntimeLibrary Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">MultiThreadedDebug</RuntimeLibrary>
-      <CompileAs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Default</CompileAs>
-      <MultiProcessorCompilation Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</MultiProcessorCompilation>
-    </ClCompile>
-    <Link>
-      <GenerateDebugInformation>true</GenerateDebugInformation>
-    </Link>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
-    <ClCompile>
-      <Optimization>MaxSpeed</Optimization>
-      <FunctionLevelLinking>true</FunctionLevelLinking>
-      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <RuntimeLibrary Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MultiThreaded</RuntimeLibrary>
-      <MultiProcessorCompilation Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</MultiProcessorCompilation>
+      <AdditionalIncludeDirectories>..\;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <WarningLevel>Level3</WarningLevel>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <PreprocessorDefinitions>PTW32_BUILD_INLINED;PTW32_STATIC_LIB;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <BufferSecurityCheck>false</BufferSecurityCheck>
     </ClCompile>
     <Link>
+      <SubSystem>Windows</SubSystem>
       <EnableCOMDATFolding>true</EnableCOMDATFolding>
       <OptimizeReferences>true</OptimizeReferences>
     </Link>
   </ItemDefinitionGroup>
-  <!--
-pthread $(VERSION) :LIBRARY: attr.c barrier.c cancel.c cleanup.c condvar.c \
-	create.c dll.c exit.c fork.c global.c misc.c mutex.c private.c \
-	rwlock.c sched.c semaphore.c spin.c sync.c tsd.c nonportable.c
--->
   <ItemGroup>
-    <ClInclude Include="..\pthread.h" />
-    <ClInclude Include="..\implement.h" />
     <ClCompile Include="..\attr.c" />
+    <ClCompile Include="..\autostatic.c" />
     <ClCompile Include="..\barrier.c" />
     <ClCompile Include="..\cancel.c" />
     <ClCompile Include="..\cleanup.c" />
     <ClCompile Include="..\condvar.c" />
     <ClCompile Include="..\create.c" />
     <ClCompile Include="..\dll.c" />
+    <ClCompile Include="..\errno.c" />
     <ClCompile Include="..\exit.c" />
     <ClCompile Include="..\fork.c" />
     <ClCompile Include="..\global.c" />
     <ClCompile Include="..\misc.c" />
     <ClCompile Include="..\mutex.c" />
+    <ClCompile Include="..\nonportable.c" />
     <ClCompile Include="..\private.c" />
     <ClCompile Include="..\rwlock.c" />
     <ClCompile Include="..\sched.c" />
     <ClCompile Include="..\semaphore.c" />
+    <ClCompile Include="..\signal.c" />
     <ClCompile Include="..\spin.c" />
     <ClCompile Include="..\sync.c" />
     <ClCompile Include="..\tsd.c" />
-    <ClCompile Include="..\nonportable.c" />
-    <ClCompile Include="..\errno.c" />
-    <ClCompile Include="..\signal.c" />
-    <ClCompile Include="..\autostatic.c">
-    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\implement.h" />
+    <ClInclude Include="..\need_errno.h" />
+    <ClInclude Include="..\pthread.h" />
+    <ClInclude Include="..\semaphore.h" />
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
diff --git a/pthreads/COPKG/pthreads.vcxproj.filters b/pthreads/COPKG/pthreads.vcxproj.filters
index df9d00a3f..f28de9c57 100644
--- a/pthreads/COPKG/pthreads.vcxproj.filters
+++ b/pthreads/COPKG/pthreads.vcxproj.filters
@@ -7,7 +7,7 @@
     </Filter>
     <Filter Include="Header Files">
       <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
-      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
+      <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
     </Filter>
     <Filter Include="Resource Files">
       <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
@@ -18,6 +18,9 @@
     <ClCompile Include="..\attr.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\autostatic.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\barrier.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -36,6 +39,9 @@
     <ClCompile Include="..\dll.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\errno.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\exit.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -51,6 +57,9 @@
     <ClCompile Include="..\mutex.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\nonportable.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\private.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -63,6 +72,9 @@
     <ClCompile Include="..\semaphore.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\signal.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\spin.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -72,24 +84,18 @@
     <ClCompile Include="..\tsd.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\nonportable.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\errno.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\signal.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\autostatic.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
   </ItemGroup>
   <ItemGroup>
+    <ClInclude Include="..\implement.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\need_errno.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
     <ClInclude Include="..\pthread.h">
       <Filter>Header Files</Filter>
     </ClInclude>
-    <ClInclude Include="..\implement.h">
+    <ClInclude Include="..\semaphore.h">
       <Filter>Header Files</Filter>
     </ClInclude>
   </ItemGroup>
diff --git a/pthreads/COPKG/pthreads.vcxproj.user b/pthreads/COPKG/pthreads.vcxproj.user
deleted file mode 100644
index abe8dd896..000000000
--- a/pthreads/COPKG/pthreads.vcxproj.user
+++ /dev/null
@@ -1,4 +0,0 @@
-﻿<?xml version="1.0" encoding="utf-8"?>
-<Project ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
-  <PropertyGroup />
-</Project>
\ No newline at end of file
diff --git a/ptools/ptools.vcxproj b/ptools/ptools.vcxproj
index fe78ce86c..452735c9d 100644
--- a/ptools/ptools.vcxproj
+++ b/ptools/ptools.vcxproj
@@ -10,33 +10,8 @@
       <Platform>x64</Platform>
     </ProjectConfiguration>
   </ItemGroup>
-  <ItemGroup>
-    <ClCompile Include="m2convertrep.cc" />
-    <ClCompile Include="m2pythontools.cc" />
-    <ClCompile Include="m2ser.cc" />
-    <ClCompile Include="m2streamdataenc.cc" />
-    <ClCompile Include="midassocket.cc" />
-    <ClCompile Include="timeconv.cc" />
-    <ClCompile Include="valprotocol2.cc" />
-    <ClCompile Include="valpython.cc" />
-  </ItemGroup>
-  <ItemGroup>
-    <ClInclude Include="chooseser.h" />
-    <ClInclude Include="jsonprint.h" />
-    <ClInclude Include="m2convertrep.h" />
-    <ClInclude Include="m2pmstack.h" />
-    <ClInclude Include="m2pythontools.h" />
-    <ClInclude Include="m2ser.h" />
-    <ClInclude Include="m2streamdataenc.h" />
-    <ClInclude Include="midassocket.h" />
-    <ClInclude Include="midasyeller.h" />
-    <ClInclude Include="serialization.h" />
-    <ClInclude Include="timeconv.h" />
-    <ClInclude Include="valpickleloader.h" />
-    <ClInclude Include="valprotocol2.h" />
-    <ClInclude Include="valpython.h" />
-  </ItemGroup>
   <PropertyGroup Label="Globals">
+    <UseNativeEnvironment>true</UseNativeEnvironment>
     <ProjectGuid>{3F07B371-1B81-477E-886C-0E079B0A6803}</ProjectGuid>
     <Keyword>Win32Proj</Keyword>
     <RootNamespace>ptools</RootNamespace>
@@ -59,7 +34,8 @@
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
   </ImportGroup>
-  <ImportGroup Label="Shared" />
+  <ImportGroup Label="Shared">
+  </ImportGroup>
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
@@ -67,27 +43,16 @@
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
   <PropertyGroup Label="UserMacros" />
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <Linkage-pthreads>static</Linkage-pthreads>
-    <OutDir>$(SolutionDir)\$(Platform)\$(Configuration)\</OutDir>
-    <IntDir>$(Platform)\$(Configuration)\</IntDir>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <Linkage-pthreads>static</Linkage-pthreads>
-    <OutDir>$(SolutionDir)\$(Platform)\$(Configuration)\</OutDir>
-    <IntDir>$(Platform)\$(Configuration)\</IntDir>
-  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <ClCompile>
-      <PrecompiledHeader>
-      </PrecompiledHeader>
+      <AdditionalIncludeDirectories>opencontainers_1_8_4\include;..\pthreads;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <WarningLevel>Level3</WarningLevel>
-      <Optimization>Disabled</Optimization>
-      <PreprocessorDefinitions>PTW32_STATIC_LIB;OC_NEW_STYLE_INCLUDES;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_WARNINGS;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <SDLCheck>true</SDLCheck>
-      <AdditionalIncludeDirectories>opencontainers_1_8_4\include;..\pthreads;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
-      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
       <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <PreprocessorDefinitions>PTW32_STATIC_LIB;OC_NEW_STYLE_INCLUDES;_DEBUG;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
     </ClCompile>
     <Link>
       <SubSystem>Windows</SubSystem>
@@ -95,17 +60,12 @@
   </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
-      <PrecompiledHeader>
-      </PrecompiledHeader>
-      <Optimization>MaxSpeed</Optimization>
-      <FunctionLevelLinking>true</FunctionLevelLinking>
-      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <PreprocessorDefinitions>PTW32_STATIC_LIB;OC_NEW_STYLE_INCLUDES;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_WARNINGS;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <SDLCheck>true</SDLCheck>
       <AdditionalIncludeDirectories>opencontainers_1_8_4\include;..\pthreads;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
-      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <WarningLevel>Level3</WarningLevel>
       <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <PreprocessorDefinitions>PTW32_STATIC_LIB;OC_NEW_STYLE_INCLUDES;NDEBUG;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <BufferSecurityCheck>false</BufferSecurityCheck>
     </ClCompile>
     <Link>
       <SubSystem>Windows</SubSystem>
@@ -113,5 +73,27 @@
       <OptimizeReferences>true</OptimizeReferences>
     </Link>
   </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="m2convertrep.cc" />
+    <ClCompile Include="m2pythontools.cc" />
+    <ClCompile Include="m2ser.cc" />
+    <ClCompile Include="m2streamdataenc.cc" />
+    <ClCompile Include="midassocket.cc" />
+    <ClCompile Include="timeconv.cc" />
+    <ClCompile Include="valprotocol2.cc" />
+    <ClCompile Include="valpython.cc" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="m2convertrep.h" />
+    <ClInclude Include="m2pythontools.h" />
+    <ClInclude Include="m2ser.h" />
+    <ClInclude Include="m2streamdataenc.h" />
+    <ClInclude Include="midassocket.h" />
+    <ClInclude Include="timeconv.h" />
+    <ClInclude Include="valprotocol2.h" />
+    <ClInclude Include="valpython.h" />
+  </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
 </Project>
\ No newline at end of file
diff --git a/ptools/ptools.vcxproj.filters b/ptools/ptools.vcxproj.filters
new file mode 100644
index 000000000..bf6f7486d
--- /dev/null
+++ b/ptools/ptools.vcxproj.filters
@@ -0,0 +1,69 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="m2convertrep.cc">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="m2pythontools.cc">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="m2ser.cc">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="m2streamdataenc.cc">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="midassocket.cc">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="timeconv.cc">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="valprotocol2.cc">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="valpython.cc">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="m2convertrep.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="m2pythontools.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="m2ser.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="m2streamdataenc.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="midassocket.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="timeconv.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="valprotocol2.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="valpython.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/vmaf.sln b/vmaf.sln
index ab12c1272..f21121ec8 100644
--- a/vmaf.sln
+++ b/vmaf.sln
@@ -3,17 +3,9 @@ Microsoft Visual Studio Solution File, Format Version 12.00
 # Visual Studio 14
 VisualStudioVersion = 14.0.25420.1
 MinimumVisualStudioVersion = 10.0.40219.1
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "vmafossexec", "wrapper\wrapper.vcxproj", "{C2D3FD1E-9068-494D-9655-88CE906B4C8B}"
-	ProjectSection(ProjectDependencies) = postProject
-		{3F07B371-1B81-477E-886C-0E079B0A6803} = {3F07B371-1B81-477E-886C-0E079B0A6803}
-		{03A4C79F-F1A8-48C9-A2AC-0A14EC0F093E} = {03A4C79F-F1A8-48C9-A2AC-0A14EC0F093E}
-		{5F5103F4-E473-4476-8E7B-FD3465E872B1} = {5F5103F4-E473-4476-8E7B-FD3465E872B1}
-	EndProjectSection
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "vmafossexec", "wrapper\vmafossexec.vcxproj", "{C2D3FD1E-9068-494D-9655-88CE906B4C8B}"
 EndProject
 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ptools", "ptools\ptools.vcxproj", "{3F07B371-1B81-477E-886C-0E079B0A6803}"
-	ProjectSection(ProjectDependencies) = postProject
-		{03A4C79F-F1A8-48C9-A2AC-0A14EC0F093E} = {03A4C79F-F1A8-48C9-A2AC-0A14EC0F093E}
-	EndProjectSection
 EndProject
 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pthreads", "pthreads\COPKG\pthreads.vcxproj", "{03A4C79F-F1A8-48C9-A2AC-0A14EC0F093E}"
 EndProject
@@ -21,31 +13,15 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libvmaf", "wrapper\libvmaf.
 EndProject
 Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "tools", "tools", "{3202BE63-8C0E-4405-9B6D-DFD6F99B3CB4}"
 EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "moment", "feature\vs2015\moment\moment.vcxproj", "{D67BDBF2-D42F-465D-BABD-A381BFFAA373}"
-	ProjectSection(ProjectDependencies) = postProject
-		{5F5103F4-E473-4476-8E7B-FD3465E872B1} = {5F5103F4-E473-4476-8E7B-FD3465E872B1}
-	EndProjectSection
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "moment", "feature\msvc\moment.vcxproj", "{D67BDBF2-D42F-465D-BABD-A381BFFAA373}"
 EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ms_ssim", "feature\vs2015\ms_ssim\ms_ssim.vcxproj", "{CF8FA427-306B-4803-9F23-31C229A630B6}"
-	ProjectSection(ProjectDependencies) = postProject
-		{5F5103F4-E473-4476-8E7B-FD3465E872B1} = {5F5103F4-E473-4476-8E7B-FD3465E872B1}
-	EndProjectSection
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ms_ssim", "feature\msvc\ms_ssim.vcxproj", "{CF8FA427-306B-4803-9F23-31C229A630B6}"
 EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "psnr", "feature\vs2015\psnr\psnr.vcxproj", "{2DC3E418-09C6-49F4-A8DA-04C614D6016D}"
-	ProjectSection(ProjectDependencies) = postProject
-		{5F5103F4-E473-4476-8E7B-FD3465E872B1} = {5F5103F4-E473-4476-8E7B-FD3465E872B1}
-	EndProjectSection
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "psnr", "feature\msvc\psnr.vcxproj", "{2DC3E418-09C6-49F4-A8DA-04C614D6016D}"
 EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ssim", "feature\vs2015\ssim\ssim.vcxproj", "{418D8FFD-D23A-4C56-8A94-D4B9D39083D1}"
-	ProjectSection(ProjectDependencies) = postProject
-		{5F5103F4-E473-4476-8E7B-FD3465E872B1} = {5F5103F4-E473-4476-8E7B-FD3465E872B1}
-	EndProjectSection
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ssim", "feature\msvc\ssim.vcxproj", "{418D8FFD-D23A-4C56-8A94-D4B9D39083D1}"
 EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "vmaf", "feature\vs2015\vmaf\vmaf.vcxproj", "{054010E9-3004-4C24-B0F3-DCCE36D6B436}"
-	ProjectSection(ProjectDependencies) = postProject
-		{03A4C79F-F1A8-48C9-A2AC-0A14EC0F093E} = {03A4C79F-F1A8-48C9-A2AC-0A14EC0F093E}
-		{5F5103F4-E473-4476-8E7B-FD3465E872B1} = {5F5103F4-E473-4476-8E7B-FD3465E872B1}
-	EndProjectSection
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "vmaf", "feature\msvc\vmaf.vcxproj", "{054010E9-3004-4C24-B0F3-DCCE36D6B436}"
 EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
diff --git a/wrapper/libvmaf.vcxproj b/wrapper/libvmaf.vcxproj
index 40500db15..1522b118b 100644
--- a/wrapper/libvmaf.vcxproj
+++ b/wrapper/libvmaf.vcxproj
@@ -11,6 +11,7 @@
     </ProjectConfiguration>
   </ItemGroup>
   <PropertyGroup Label="Globals">
+    <UseNativeEnvironment>true</UseNativeEnvironment>
     <ProjectGuid>{5F5103F4-E473-4476-8E7B-FD3465E872B1}</ProjectGuid>
     <Keyword>Win32Proj</Keyword>
     <RootNamespace>libvmaf</RootNamespace>
@@ -42,17 +43,15 @@
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
   <PropertyGroup Label="UserMacros" />
-  <PropertyGroup />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <ClCompile>
-      <PrecompiledHeader>
-      </PrecompiledHeader>
+      <AdditionalIncludeDirectories>..\feature\src;..\feature\src\common;..\pthreads;..\ptools;..\ptools\opencontainers_1_8_4\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <WarningLevel>Level3</WarningLevel>
-      <Optimization>Disabled</Optimization>
-      <PreprocessorDefinitions>MULTI_THREADING;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;PTW32_STATIC_LIB;OC_NEW_STYLE_INCLUDES;WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <AdditionalIncludeDirectories>..\feature\src\common;..\ptools;..\ptools\opencontainers_1_8_4\include;..\pthreads;..\feature\src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <SDLCheck>true</SDLCheck>
       <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <PreprocessorDefinitions>MULTI_THREADING;PTW32_STATIC_LIB;OC_NEW_STYLE_INCLUDES;_DEBUG;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
     </ClCompile>
     <Link>
@@ -61,17 +60,12 @@
   </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <ClCompile>
+      <AdditionalIncludeDirectories>..\feature\src;..\feature\src\common;..\pthreads;..\ptools;..\ptools\opencontainers_1_8_4\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <WarningLevel>Level3</WarningLevel>
-      <PrecompiledHeader>
-      </PrecompiledHeader>
-      <Optimization>MaxSpeed</Optimization>
-      <FunctionLevelLinking>true</FunctionLevelLinking>
-      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <PreprocessorDefinitions>MULTI_THREADING;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;PTW32_STATIC_LIB;OC_NEW_STYLE_INCLUDES;WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <AdditionalIncludeDirectories>..\feature\src\common;..\ptools;..\ptools\opencontainers_1_8_4\include;..\pthreads;..\feature\src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
-      <SDLCheck>true</SDLCheck>
       <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <PreprocessorDefinitions>MULTI_THREADING;PTW32_STATIC_LIB;OC_NEW_STYLE_INCLUDES;NDEBUG;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <BufferSecurityCheck>false</BufferSecurityCheck>
     </ClCompile>
     <Link>
       <SubSystem>Windows</SubSystem>
@@ -79,9 +73,6 @@
       <OptimizeReferences>true</OptimizeReferences>
     </Link>
   </ItemDefinitionGroup>
-  <ItemGroup>
-    <Text Include="ReadMe.txt" />
-  </ItemGroup>
   <ItemGroup>
     <ClCompile Include="..\feature\src\adm.c" />
     <ClCompile Include="..\feature\src\adm_tools.c" />
@@ -92,7 +83,10 @@
     <ClCompile Include="..\feature\src\common\alloc.c" />
     <ClCompile Include="..\feature\src\common\blur_array.c" />
     <ClCompile Include="..\feature\src\common\convolution.c" />
-    <ClCompile Include="..\feature\src\common\convolution_avx.c" />
+    <ClCompile Include="..\feature\src\common\convolution_avx.c">
+      <EnableEnhancedInstructionSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+      <EnableEnhancedInstructionSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
     <ClCompile Include="..\feature\src\common\cpu.c" />
     <ClCompile Include="..\feature\src\common\file_io.c" />
     <ClCompile Include="..\feature\src\common\frame.c" />
@@ -119,6 +113,7 @@
   <ItemGroup>
     <ClInclude Include="..\feature\src\adm_options.h" />
     <ClInclude Include="..\feature\src\adm_tools.h" />
+    <ClInclude Include="..\feature\src\all_options.h" />
     <ClInclude Include="..\feature\src\ansnr_options.h" />
     <ClInclude Include="..\feature\src\ansnr_tools.h" />
     <ClInclude Include="..\feature\src\common\alignment.h" />
@@ -138,6 +133,7 @@
     <ClInclude Include="..\feature\src\iqa\iqa_os.h" />
     <ClInclude Include="..\feature\src\iqa\math_utils.h" />
     <ClInclude Include="..\feature\src\iqa\ssim_tools.h" />
+    <ClInclude Include="..\feature\src\moment_options.h" />
     <ClInclude Include="..\feature\src\motion_options.h" />
     <ClInclude Include="..\feature\src\motion_tools.h" />
     <ClInclude Include="..\feature\src\psnr_options.h" />
@@ -147,6 +143,7 @@
     <ClInclude Include="src\combo.h" />
     <ClInclude Include="src\cpu_info.h" />
     <ClInclude Include="src\darray.h" />
+    <ClInclude Include="src\debug.h" />
     <ClInclude Include="src\libvmaf.h" />
     <ClInclude Include="src\pugixml\pugiconfig.hpp" />
     <ClInclude Include="src\pugixml\pugixml.hpp" />
diff --git a/wrapper/libvmaf.vcxproj.filters b/wrapper/libvmaf.vcxproj.filters
index d128d8e5f..05f5670e9 100644
--- a/wrapper/libvmaf.vcxproj.filters
+++ b/wrapper/libvmaf.vcxproj.filters
@@ -26,9 +26,6 @@
       <UniqueIdentifier>{25a2d365-ffa6-4464-9712-344c46384a31}</UniqueIdentifier>
     </Filter>
   </ItemGroup>
-  <ItemGroup>
-    <Text Include="ReadMe.txt" />
-  </ItemGroup>
   <ItemGroup>
     <ClCompile Include="..\feature\src\all.c">
       <Filter>feature</Filter>
@@ -45,9 +42,6 @@
     <ClCompile Include="..\feature\src\ansnr_tools.c">
       <Filter>feature</Filter>
     </ClCompile>
-    <ClCompile Include="..\feature\src\common\blur_array.c">
-      <Filter>feature</Filter>
-    </ClCompile>
     <ClCompile Include="..\feature\src\moment.c">
       <Filter>feature</Filter>
     </ClCompile>
@@ -126,6 +120,9 @@
     <ClCompile Include="src\vmaf.cpp">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\feature\src\common\blur_array.c">
+      <Filter>feature\common</Filter>
+    </ClCompile>
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="..\feature\src\adm_options.h">
@@ -140,9 +137,6 @@
     <ClInclude Include="..\feature\src\ansnr_tools.h">
       <Filter>feature</Filter>
     </ClInclude>
-    <ClInclude Include="..\feature\src\common\blur_array.h">
-      <Filter>feature</Filter>
-    </ClInclude>
     <ClInclude Include="..\feature\src\psnr_tools.h">
       <Filter>feature</Filter>
     </ClInclude>
@@ -236,5 +230,17 @@
     <ClInclude Include="src\vmaf.h">
       <Filter>Header Files</Filter>
     </ClInclude>
+    <ClInclude Include="..\feature\src\common\blur_array.h">
+      <Filter>feature\common</Filter>
+    </ClInclude>
+    <ClInclude Include="..\feature\src\all_options.h">
+      <Filter>feature</Filter>
+    </ClInclude>
+    <ClInclude Include="..\feature\src\moment_options.h">
+      <Filter>feature</Filter>
+    </ClInclude>
+    <ClInclude Include="src\debug.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
   </ItemGroup>
 </Project>
\ No newline at end of file
diff --git a/wrapper/wrapper.vcxproj b/wrapper/vmafossexec.vcxproj
similarity index 61%
rename from wrapper/wrapper.vcxproj
rename to wrapper/vmafossexec.vcxproj
index 4f3828896..04a70888b 100644
--- a/wrapper/wrapper.vcxproj
+++ b/wrapper/vmafossexec.vcxproj
@@ -11,11 +11,11 @@
     </ProjectConfiguration>
   </ItemGroup>
   <PropertyGroup Label="Globals">
+    <UseNativeEnvironment>true</UseNativeEnvironment>
     <ProjectGuid>{C2D3FD1E-9068-494D-9655-88CE906B4C8B}</ProjectGuid>
     <Keyword>Win32Proj</Keyword>
-    <RootNamespace>wrapper</RootNamespace>
+    <RootNamespace>vmafossexec</RootNamespace>
     <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
-    <ProjectName>vmafossexec</ProjectName>
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
@@ -34,7 +34,8 @@
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
   </ImportGroup>
-  <ImportGroup Label="Shared" />
+  <ImportGroup Label="Shared">
+  </ImportGroup>
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
@@ -42,31 +43,18 @@
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
   <PropertyGroup Label="UserMacros" />
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <LinkIncremental>true</LinkIncremental>
-    <Linkage-pthreads>static</Linkage-pthreads>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <LinkIncremental>false</LinkIncremental>
-    <Linkage-pthreads>static</Linkage-pthreads>
-  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <ClCompile>
-      <PrecompiledHeader>
-      </PrecompiledHeader>
+      <AdditionalIncludeDirectories>..\feature\src;..\feature\src\common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <WarningLevel>Level3</WarningLevel>
-      <Optimization>Disabled</Optimization>
-      <PreprocessorDefinitions>MULTI_THREADING;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;PTW32_STATIC_LIB;OC_NEW_STYLE_INCLUDES;WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <SDLCheck>true</SDLCheck>
-      <AdditionalIncludeDirectories>..\feature\src\common;..\ptools;..\ptools\opencontainers_1_8_4\include;..\pthreads;..\feature\src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>MULTI_THREADING;_DEBUG;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
-      <MultiProcessorCompilation>true</MultiProcessorCompilation>
     </ClCompile>
     <Link>
       <SubSystem>Console</SubSystem>
-      <GenerateDebugInformation>true</GenerateDebugInformation>
-      <AdditionalDependencies>libvmaf.lib;libpthread-static.lib;%(AdditionalDependencies)</AdditionalDependencies>
-      <AdditionalLibraryDirectories>$(SolutionDir)\$(Platform)\$(Configuration)\;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
     </Link>
     <PostBuildEvent>
       <Message>copy examples.bat to output folder</Message>
@@ -75,26 +63,16 @@
   </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <ClCompile>
+      <AdditionalIncludeDirectories>..\feature\src;..\feature\src\common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <WarningLevel>Level3</WarningLevel>
-      <PrecompiledHeader>
-      </PrecompiledHeader>
-      <Optimization>MaxSpeed</Optimization>
-      <FunctionLevelLinking>true</FunctionLevelLinking>
-      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <PreprocessorDefinitions>MULTI_THREADING;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;PTW32_STATIC_LIB;OC_NEW_STYLE_INCLUDES;WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <SDLCheck>true</SDLCheck>
-      <AdditionalIncludeDirectories>..\feature\src\common;..\ptools;..\ptools\opencontainers_1_8_4\include;..\pthreads;..\feature\src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>MULTI_THREADING;NDEBUG;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
-      <EnableEnhancedInstructionSet>AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
-      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <BufferSecurityCheck>false</BufferSecurityCheck>
     </ClCompile>
     <Link>
       <SubSystem>Console</SubSystem>
       <EnableCOMDATFolding>true</EnableCOMDATFolding>
       <OptimizeReferences>true</OptimizeReferences>
-      <GenerateDebugInformation>true</GenerateDebugInformation>
-      <AdditionalDependencies>libvmaf.lib;libpthread-static.lib;%(AdditionalDependencies)</AdditionalDependencies>
-      <AdditionalLibraryDirectories>$(SolutionDir)\$(Platform)\$(Configuration)\;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
     </Link>
     <PostBuildEvent>
       <Message>copy examples.bat to output folder</Message>
@@ -105,9 +83,17 @@
     <ClCompile Include="src\main.cpp" />
   </ItemGroup>
   <ItemGroup>
+    <ProjectReference Include="..\pthreads\COPKG\pthreads.vcxproj">
+      <Project>{03a4c79f-f1a8-48c9-a2ac-0a14ec0f093e}</Project>
+    </ProjectReference>
     <ProjectReference Include="..\ptools\ptools.vcxproj">
       <Project>{3f07b371-1b81-477e-886c-0e079b0a6803}</Project>
     </ProjectReference>
+    <ProjectReference Include="libvmaf.vcxproj">
+      <Project>{5f5103f4-e473-4476-8e7b-fd3465e872b1}</Project>
+    </ProjectReference>
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
 </Project>
\ No newline at end of file
diff --git a/wrapper/vmafossexec.vcxproj.filters b/wrapper/vmafossexec.vcxproj.filters
new file mode 100644
index 000000000..7459f10e3
--- /dev/null
+++ b/wrapper/vmafossexec.vcxproj.filters
@@ -0,0 +1,22 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="src\main.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/wrapper/wrapper.vcxproj.filters b/wrapper/wrapper.vcxproj.filters
deleted file mode 100644
index 2dc63c25f..000000000
--- a/wrapper/wrapper.vcxproj.filters
+++ /dev/null
@@ -1,6 +0,0 @@
-﻿<?xml version="1.0" encoding="utf-8"?>
-<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
-  <ItemGroup>
-    <ClCompile Include="src\main.cpp" />
-  </ItemGroup>
-</Project>
\ No newline at end of file

From 012f51cb0667b40f0d5c872b828cf27bf5086266 Mon Sep 17 00:00:00 2001
From: Zhi Li <zli@netflix.com>
Date: Tue, 29 Jan 2019 16:50:40 -0800
Subject: [PATCH 10/29] Optimization of ADM, VIF and buffer management modules
 (#288) (#289)

* Optimization of ADM, VIF and buffer management modules (#288)

* Format files; misc.
---
 feature/src/adm.c                         |  228 +++
 feature/src/adm_tools.c                   |  709 ++++++++
 feature/src/adm_tools.h                   |  124 ++
 feature/src/common/blur_array.c           |  141 +-
 feature/src/common/blur_array.h           |   26 +-
 feature/src/common/convolution.c          |    1 +
 feature/src/common/convolution.h          |    5 +
 feature/src/common/convolution_avx.c      | 1792 +++++++++++++++++++++
 feature/src/common/convolution_internal.h |   60 +
 feature/src/vif.c                         |   94 +-
 feature/src/vif_options.h                 |    6 +
 feature/src/vif_tools.c                   |  203 +++
 feature/src/vif_tools.h                   |    6 +
 wrapper/Makefile                          |   10 +-
 wrapper/src/combo.c                       |  207 ++-
 wrapper/src/combo.h                       |    3 +
 wrapper/src/darray.c                      |   16 +
 wrapper/src/libvmaf.h                     |    9 +
 wrapper/src/main.cpp                      |   64 +-
 wrapper/src/vmaf.cpp                      |    7 +
 20 files changed, 3676 insertions(+), 35 deletions(-)

diff --git a/feature/src/adm.c b/feature/src/adm.c
index 7e77ea3fa..1123d710c 100644
--- a/feature/src/adm.c
+++ b/feature/src/adm.c
@@ -39,6 +39,11 @@ typedef adm_dwt_band_t_s adm_dwt_band_t;
 #define adm_sum_cube  adm_sum_cube_s
 #define offset_image       offset_image_s
 
+#if ADM_OPT_ENABLE
+    #define adm_csf_den_scale adm_csf_den_scale_s
+    #define dwt2_src_indices_filt dwt2_src_indices_filt_s
+#endif
+
 static char *init_dwt_band(adm_dwt_band_t *band, char *data_top, size_t buf_sz_one)
 {
     band->band_a = (float *)data_top; data_top += buf_sz_one;
@@ -48,6 +53,228 @@ static char *init_dwt_band(adm_dwt_band_t *band, char *data_top, size_t buf_sz_o
     return data_top;
 }
 
+#if ADM_OPT_ENABLE
+static char *init_dwt_band_hvd(adm_dwt_band_t *band, char *data_top, size_t buf_sz_one)
+{
+	band->band_a = NULL;
+	band->band_h = (float *)data_top; data_top += buf_sz_one;
+	band->band_v = (float *)data_top; data_top += buf_sz_one;
+	band->band_d = (float *)data_top; data_top += buf_sz_one;
+	return data_top;
+}
+
+int compute_adm(const float *ref, const float *dis, int w, int h, int ref_stride, int dis_stride, double *score, double *score_num, double *score_den, double *scores, double border_factor)
+{
+#ifdef ADM_OPT_SINGLE_PRECISION
+	double numden_limit = 1e-2 * (w * h) / (1920.0 * 1080.0);
+#else
+	double numden_limit = 1e-10 * (w * h) / (1920.0 * 1080.0);
+#endif
+	float *data_buf = 0;
+	char *data_top;
+
+	char *ind_buf_y = 0, *buf_y_orig = 0;
+	char *ind_buf_x = 0, *buf_x_orig = 0;
+	int *ind_y[4], *ind_x[4];
+
+	float *ref_scale;
+	float *dis_scale;
+
+	adm_dwt_band_t ref_dwt2;
+	adm_dwt_band_t dis_dwt2;
+
+	adm_dwt_band_t decouple_r;
+	adm_dwt_band_t decouple_a;
+
+	adm_dwt_band_t csf_r;
+	adm_dwt_band_t csf_a;
+	const float *curr_ref_scale = ref;
+	const float *curr_dis_scale = dis;
+	int curr_ref_stride = ref_stride;
+	int curr_dis_stride = dis_stride;
+
+	int orig_h = h;
+
+	int buf_stride = ALIGN_CEIL(((w + 1) / 2) * sizeof(float));
+	size_t buf_sz_one = (size_t)buf_stride * ((h + 1) / 2);
+
+	int ind_size_y = ALIGN_CEIL(((h + 1) / 2) * sizeof(int));
+	int ind_size_x = ALIGN_CEIL(((w + 1) / 2) * sizeof(int));
+
+	double num = 0;
+	double den = 0;
+
+	int scale;
+	int ret = 1;
+	
+	// Code optimized to save on multiple buffer copies 
+	// hence the reduction in the number of buffers required from 35 to 17 
+#define NUM_BUFS_ADM 17 
+	if (SIZE_MAX / buf_sz_one < NUM_BUFS_ADM)
+	{
+		printf("error: SIZE_MAX / buf_sz_one < NUM_BUFS_ADM, buf_sz_one = %zu.\n", buf_sz_one);
+		fflush(stdout);
+		goto fail;
+	}
+
+	if (!(data_buf = aligned_malloc(buf_sz_one * NUM_BUFS_ADM, MAX_ALIGN)))
+	{
+		printf("error: aligned_malloc failed for data_buf.\n");
+		fflush(stdout);
+		goto fail;
+	}
+
+	data_top = (char *)data_buf;
+
+	data_top = init_dwt_band(&ref_dwt2, data_top, buf_sz_one);
+	data_top = init_dwt_band(&dis_dwt2, data_top, buf_sz_one);
+	data_top = init_dwt_band_hvd(&decouple_r, data_top, buf_sz_one);
+	data_top = init_dwt_band_hvd(&decouple_a, data_top, buf_sz_one);
+	data_top = init_dwt_band_hvd(&csf_a, data_top, buf_sz_one);
+
+	if (!(buf_y_orig = aligned_malloc(ind_size_y * 4, MAX_ALIGN)))
+	{
+		printf("error: aligned_malloc failed for ind_buf_y.\n");
+		fflush(stdout);
+		goto fail;
+	}
+	ind_buf_y = buf_y_orig;
+	ind_y[0] = (int*)ind_buf_y; ind_buf_y += ind_size_y;
+	ind_y[1] = (int*)ind_buf_y; ind_buf_y += ind_size_y;
+	ind_y[2] = (int*)ind_buf_y; ind_buf_y += ind_size_y;
+	ind_y[3] = (int*)ind_buf_y; ind_buf_y += ind_size_y;
+
+	if (!(buf_x_orig = aligned_malloc(ind_size_x * 4, MAX_ALIGN)))
+	{
+		printf("error: aligned_malloc failed for ind_buf_x.\n");
+		fflush(stdout);
+		goto fail;
+	}
+	ind_buf_x = buf_x_orig;
+	ind_x[0] = (int*)ind_buf_x; ind_buf_x += ind_size_x;
+	ind_x[1] = (int*)ind_buf_x; ind_buf_x += ind_size_x;
+	ind_x[2] = (int*)ind_buf_x; ind_buf_x += ind_size_x;
+	ind_x[3] = (int*)ind_buf_x; ind_buf_x += ind_size_x;
+
+	for (scale = 0; scale < 4; ++scale) {
+#ifdef ADM_OPT_DEBUG_DUMP
+		char pathbuf[256];
+#endif
+		float num_scale = 0.0;
+		float den_scale = 0.0;
+	
+		dwt2_src_indices_filt(ind_y, ind_x, w, h);
+		adm_dwt2(curr_ref_scale, &ref_dwt2, ind_y, ind_x, w, h, curr_ref_stride, buf_stride);
+		adm_dwt2(curr_dis_scale, &dis_dwt2, ind_y, ind_x, w, h, curr_dis_stride, buf_stride);
+
+		w = (w + 1) / 2;
+		h = (h + 1) / 2;
+	
+		adm_decouple(&ref_dwt2, &dis_dwt2, &decouple_r, &decouple_a, w, h, buf_stride, buf_stride, buf_stride, buf_stride, border_factor);
+
+		den_scale = adm_csf_den_scale(&ref_dwt2, orig_h, scale, w, h, buf_stride, border_factor);
+
+		adm_csf(&decouple_a, &csf_a, orig_h, scale, w, h, buf_stride, buf_stride, border_factor);
+	
+		num_scale = adm_cm(&decouple_r, NULL, &csf_a, w, h, buf_stride, buf_stride, buf_stride, border_factor, scale);
+
+#ifdef ADM_OPT_DEBUG_DUMP
+		sprintf(pathbuf, "stage/ref[%d]_a.yuv", scale);
+		write_image(pathbuf, ref_dwt2.band_a, w, h, buf_stride, sizeof(float));
+
+		sprintf(pathbuf, "stage/ref[%d]_h.yuv", scale);
+		write_image(pathbuf, ref_dwt2.band_h, w, h, buf_stride, sizeof(float));
+
+		sprintf(pathbuf, "stage/ref[%d]_v.yuv", scale);
+		write_image(pathbuf, ref_dwt2.band_v, w, h, buf_stride, sizeof(float));
+
+		sprintf(pathbuf, "stage/ref[%d]_d.yuv", scale);
+		write_image(pathbuf, ref_dwt2.band_d, w, h, buf_stride, sizeof(float));
+
+		sprintf(pathbuf, "stage/dis[%d]_a.yuv", scale);
+		write_image(pathbuf, dis_dwt2.band_a, w, h, buf_stride, sizeof(float));
+
+		sprintf(pathbuf, "stage/dis[%d]_h.yuv", scale);
+		write_image(pathbuf, dis_dwt2.band_h, w, h, buf_stride, sizeof(float));
+
+		sprintf(pathbuf, "stage/dis[%d]_v.yuv", scale);
+		write_image(pathbuf, dis_dwt2.band_v, w, h, buf_stride, sizeof(float));
+
+		sprintf(pathbuf, "stage/dis[%d]_d.yuv", scale);
+		write_image(pathbuf, dis_dwt2.band_d, w, h, buf_stride, sizeof(float));
+
+		sprintf(pathbuf, "stage/r[%d]_h.yuv", scale);
+		write_image(pathbuf, decouple_r.band_h, w, h, buf_stride, sizeof(float));
+
+		sprintf(pathbuf, "stage/r[%d]_v.yuv", scale);
+		write_image(pathbuf, decouple_r.band_v, w, h, buf_stride, sizeof(float));
+
+		sprintf(pathbuf, "stage/r[%d]_d.yuv", scale);
+		write_image(pathbuf, decouple_r.band_d, w, h, buf_stride, sizeof(float));
+
+		sprintf(pathbuf, "stage/a[%d]_h.yuv", scale);
+		write_image(pathbuf, decouple_a.band_h, w, h, buf_stride, sizeof(float));
+
+		sprintf(pathbuf, "stage/a[%d]_v.yuv", scale);
+		write_image(pathbuf, decouple_a.band_v, w, h, buf_stride, sizeof(float));
+
+		sprintf(pathbuf, "stage/a[%d]_d.yuv", scale);
+		write_image(pathbuf, decouple_a.band_d, w, h, buf_stride, sizeof(float));
+
+		sprintf(pathbuf, "stage/csf_a[%d]_h.yuv", scale);
+		write_image(pathbuf, csf_a.band_h, w, h, buf_stride, sizeof(float));
+
+		sprintf(pathbuf, "stage/csf_a[%d]_v.yuv", scale);
+		write_image(pathbuf, csf_a.band_v, w, h, buf_stride, sizeof(float));
+
+		sprintf(pathbuf, "stage/csf_a[%d]_d.yuv", scale);
+		write_image(pathbuf, csf_a.band_d, w, h, buf_stride, sizeof(float));
+
+#endif
+	
+		num += num_scale;
+		den += den_scale;
+
+		ref_scale = ref_dwt2.band_a;
+		dis_scale = dis_dwt2.band_a;
+
+		curr_ref_scale = ref_scale;
+		curr_dis_scale = dis_scale;
+
+		curr_ref_stride = buf_stride;
+		curr_dis_stride = buf_stride;
+
+#ifdef ADM_OPT_DEBUG_DUMP
+		PRINTF("num: %f\n", num);
+		PRINTF("den: %f\n", den);
+#endif
+		scores[2 * scale + 0] = num_scale;
+		scores[2 * scale + 1] = den_scale;
+	}
+
+	num = num < numden_limit ? 0 : num;
+	den = den < numden_limit ? 0 : den;
+
+	if (den == 0.0)
+	{
+		*score = 1.0f;
+	}
+	else
+	{
+		*score = num / den;
+	}
+	*score_num = num;
+	*score_den = den;
+
+	ret = 0;
+
+fail:
+	aligned_free(data_buf);
+	aligned_free(buf_y_orig);
+	aligned_free(buf_x_orig);
+	return ret;
+}
+#else // ADM_OPT_ENABLE
 int compute_adm(const float *ref, const float *dis, int w, int h, int ref_stride, int dis_stride, double *score, double *score_num, double *score_den, double *scores, double border_factor)
 {
 #ifdef ADM_OPT_SINGLE_PRECISION
@@ -273,6 +500,7 @@ int compute_adm(const float *ref, const float *dis, int w, int h, int ref_stride
     aligned_free(data_buf);
     return ret;
 }
+#endif
 
 int adm(int (*read_frame)(float *ref_data, float *main_data, float *temp_data, int stride, void *user_data), void *user_data, int w, int h, const char *fmt)
 {
diff --git a/feature/src/adm_tools.c b/feature/src/adm_tools.c
index 94c9116c2..5cb57aecd 100644
--- a/feature/src/adm_tools.c
+++ b/feature/src/adm_tools.c
@@ -48,6 +48,23 @@ static float rcp_s(float x)
 static const float dwt2_db2_coeffs_lo_s[4] = { 0.482962913144690, 0.836516303737469, 0.224143868041857, -0.129409522550921 };
 static const float dwt2_db2_coeffs_hi_s[4] = { -0.129409522550921, -0.224143868041857, 0.836516303737469, -0.482962913144690 };
 
+#if ADM_OPT_ENABLE
+#ifndef FLOAT_ONE_BY_30
+#define FLOAT_ONE_BY_30	0.0333333351
+#endif
+
+#ifndef FLOAT_ONE_BY_15
+#define FLOAT_ONE_BY_15 0.0666666701
+#endif
+
+static const float fcoeff_cm_thresh_s[3][3] =
+{
+	{ FLOAT_ONE_BY_30, FLOAT_ONE_BY_30, FLOAT_ONE_BY_30 },
+	{ FLOAT_ONE_BY_30, FLOAT_ONE_BY_15, FLOAT_ONE_BY_30 },
+	{ FLOAT_ONE_BY_30, FLOAT_ONE_BY_30, FLOAT_ONE_BY_30 }
+};
+#endif
+
 float adm_sum_cube_s(const float *x, int w, int h, int stride, double border_factor)
 {
     int px_stride = stride / sizeof(float);
@@ -76,6 +93,124 @@ float adm_sum_cube_s(const float *x, int w, int h, int stride, double border_fac
     return powf(accum, 1.0f / 3.0f) + powf((bottom - top) * (right - left) / 32.0f, 1.0f / 3.0f);
 }
 
+#if ADM_OPT_ENABLE
+void adm_decouple_s(const adm_dwt_band_t_s *ref, const adm_dwt_band_t_s *dis, const adm_dwt_band_t_s *r, const adm_dwt_band_t_s *a, int w, int h, int ref_stride, int dis_stride, int r_stride, int a_stride, double border_factor)
+{
+#ifdef ADM_OPT_AVOID_ATAN
+	const float cos_1deg_sq = cos(1.0 * M_PI / 180.0) * cos(1.0 * M_PI / 180.0);
+#endif
+	const float eps = 1e-30;
+
+	int ref_px_stride = ref_stride / sizeof(float);
+	int dis_px_stride = dis_stride / sizeof(float);
+	int r_px_stride = r_stride / sizeof(float);
+	int a_px_stride = a_stride / sizeof(float);
+	
+	/* The computation of the score is not required for the regions which lie outside the frame borders */
+	int left = w * border_factor - 0.5 - 1; // -1 for filter tap
+	int top = h * border_factor - 0.5 - 1;
+	int right = w - left + 2; // +2 for filter tap
+	int bottom = h - top + 2;
+
+	if (left < 0) {
+		left = 0;
+	}
+	if (right > w) {
+		right = w;
+	}
+	if (top < 0) {
+		top = 0;
+	}
+	if (bottom > h) {
+		bottom = h;
+	}
+
+	float oh, ov, od, th, tv, td;
+	float kh, kv, kd, tmph, tmpv, tmpd;
+#ifdef ADM_OPT_AVOID_ATAN
+	float ot_dp, o_mag_sq, t_mag_sq;
+#else
+	float oa, ta, diff;
+#endif
+	int angle_flag;
+	int i, j;
+
+	for (i = top; i < bottom; ++i) {
+		for (j = left; j < right; ++j) {
+			oh = ref->band_h[i * ref_px_stride + j];
+			ov = ref->band_v[i * ref_px_stride + j];
+			od = ref->band_d[i * ref_px_stride + j];
+			th = dis->band_h[i * dis_px_stride + j];
+			tv = dis->band_v[i * dis_px_stride + j];
+			td = dis->band_d[i * dis_px_stride + j];
+
+			kh = DIVS(th, oh + eps);
+			kv = DIVS(tv, ov + eps);
+			kd = DIVS(td, od + eps);
+
+			kh = kh < 0.0f ? 0.0f : (kh > 1.0f ? 1.0f : kh);
+			kv = kv < 0.0f ? 0.0f : (kv > 1.0f ? 1.0f : kv);
+			kd = kd < 0.0f ? 0.0f : (kd > 1.0f ? 1.0f : kd);
+
+			tmph = kh * oh;
+			tmpv = kv * ov;
+			tmpd = kd * od;
+#ifdef ADM_OPT_AVOID_ATAN
+			/* Determine if angle between (oh,ov) and (th,tv) is less than 1 degree.
+			 * Given that u is the angle (oh,ov) and v is the angle (th,tv), this can
+			 * be done by testing the inequvality.
+			 *
+			 * { (u.v.) >= 0 } AND { (u.v)^2 >= cos(1deg)^2 * ||u||^2 * ||v||^2 }
+			 *
+			 * Proof:
+			 *
+			 * cos(theta) = (u.v) / (||u|| * ||v||)
+			 *
+			 * IF u.v >= 0 THEN
+			 *   cos(theta)^2 = (u.v)^2 / (||u||^2 * ||v||^2)
+			 *   (u.v)^2 = cos(theta)^2 * ||u||^2 * ||v||^2
+			 *
+			 *   IF |theta| < 1deg THEN
+			 *     (u.v)^2 >= cos(1deg)^2 * ||u||^2 * ||v||^2
+			 *   END
+			 * ELSE
+			 *   |theta| > 90deg
+			 * END
+			 */
+			ot_dp = oh * th + ov * tv;
+			o_mag_sq = oh * oh + ov * ov;
+			t_mag_sq = th * th + tv * tv;
+
+			angle_flag = (ot_dp >= 0.0f) && (ot_dp * ot_dp >= cos_1deg_sq * o_mag_sq * t_mag_sq);
+#else
+			oa = atanf(DIVS(ov, oh + eps));
+			ta = atanf(DIVS(tv, th + eps));
+
+			if (oh < 0.0f)
+				oa += (float)M_PI;
+			if (th < 0.0f)
+				ta += (float)M_PI;
+
+			diff = fabsf(oa - ta) * 180.0f / M_PI;
+			angle_flag = diff < 1.0f;
+#endif
+			if (angle_flag) {
+				tmph = th;
+				tmpv = tv;
+				tmpd = td;
+			}
+
+			r->band_h[i * r_px_stride + j] = tmph;
+			r->band_v[i * r_px_stride + j] = tmpv;
+			r->band_d[i * r_px_stride + j] = tmpd;
+
+			a->band_h[i * a_px_stride + j] = th - tmph;
+			a->band_v[i * a_px_stride + j] = tv - tmpv;
+			a->band_d[i * a_px_stride + j] = td - tmpd;
+		}
+	}
+}
+#else
 void adm_decouple_s(const adm_dwt_band_t_s *ref, const adm_dwt_band_t_s *dis, const adm_dwt_band_t_s *r, const adm_dwt_band_t_s *a, int w, int h, int ref_stride, int dis_stride, int r_stride, int a_stride)
 {
 #ifdef ADM_OPT_AVOID_ATAN
@@ -173,7 +308,59 @@ void adm_decouple_s(const adm_dwt_band_t_s *ref, const adm_dwt_band_t_s *dis, co
         }
     }
 }
+#endif
 
+#if ADM_OPT_ENABLE
+void adm_csf_s(const adm_dwt_band_t_s *src, const adm_dwt_band_t_s *dst, int orig_h, int scale, int w, int h, int src_stride, int dst_stride, double border_factor)
+{
+	const float *src_angles[3] = { src->band_h, src->band_v, src->band_d };
+	float *dst_angles[3] = { dst->band_h, dst->band_v, dst->band_d };
+
+	const float *src_ptr;
+	float *dst_ptr;
+
+	int src_px_stride = src_stride / sizeof(float);
+	int dst_px_stride = dst_stride / sizeof(float);
+
+	// for ADM: scales goes from 0 to 3 but in noise floor paper, it goes from
+	// 1 to 4 (from finest scale to coarsest scale).
+	float factor1 = dwt_quant_step(&dwt_7_9_YCbCr_threshold[0], scale, 1);
+	float factor2 = dwt_quant_step(&dwt_7_9_YCbCr_threshold[0], scale, 2);
+	float rfactor[3] = { 1.0f / factor1, 1.0f / factor1, 1.0f / factor2 };
+
+	/* The computation of the csf values is not required for the regions which lie outside the frame borders */
+	int left = w * border_factor - 0.5 - 1; // -1 for filter tap
+	int top = h * border_factor - 0.5 - 1;
+	int right = w - left + 2; // +2 for filter tap
+	int bottom = h - top + 2;
+
+	if (left < 0) {
+		left = 0;
+	}
+	if (right > w) {
+		right = w;
+	}
+	if (top < 0) {
+		top = 0;
+	}
+	if (bottom > h) {
+		bottom = h;
+	}
+
+	int i, j, theta;
+
+	for (theta = 0; theta < 3; ++theta) {
+		src_ptr = src_angles[theta];
+		dst_ptr = dst_angles[theta];
+
+		for (i = top; i < bottom; ++i) {
+			for (j = left; j < right; ++j) {
+				dst_ptr[i * dst_px_stride + j] = rfactor[theta] * src_ptr[i * src_px_stride + j];
+			}
+		}
+	}
+}
+#else
 void adm_csf_s(const adm_dwt_band_t_s *src, const adm_dwt_band_t_s *dst, int orig_h, int scale, int w, int h, int src_stride, int dst_stride)
 {
     const float *src_angles[3] = { src->band_h, src->band_v, src->band_d };
@@ -204,7 +391,137 @@ void adm_csf_s(const adm_dwt_band_t_s *src, const adm_dwt_band_t_s *dst, int ori
         }
     }
 }
+#endif
 
+#if ADM_OPT_ENABLE
+/* Combination of adm_csf_s and adm_sum_cube_s for csf_o based den_scale */
+float adm_csf_den_scale_s(const adm_dwt_band_t_s *src, int orig_h, int scale, int w, int h, int src_stride, double border_factor)
+{
+	float *src_h = src->band_h, *src_v = src->band_v, *src_d = src->band_d;
+
+	int src_px_stride = src_stride / sizeof(float);
+
+	// for ADM: scales goes from 0 to 3 but in noise floor paper, it goes from
+	// 1 to 4 (from finest scale to coarsest scale).
+	float factor1 = dwt_quant_step(&dwt_7_9_YCbCr_threshold[0], scale, 1);
+	float factor2 = dwt_quant_step(&dwt_7_9_YCbCr_threshold[0], scale, 2);
+	float rfactor[3] = { 1.0f / factor1, 1.0f / factor1, 1.0f / factor2 };
+
+	float accum_h = 0, accum_v = 0, accum_d = 0;
+	float accum_inner_h, accum_inner_v, accum_inner_d;
+	float den_scale_h, den_scale_v, den_scale_d;
+
+	float val;
+	
+	/* The computation of the denominator scales is not required for the regions which lie outside the frame borders */
+	int left = w * border_factor - 0.5;
+	int top = h * border_factor - 0.5;
+	int right = w - left;
+	int bottom = h - top;
+
+	int i, j;
+
+	for (i = top; i < bottom; ++i) {
+		accum_inner_h = 0;
+		accum_inner_v = 0;
+		accum_inner_d = 0;
+		src_h = src->band_h + i * src_px_stride;
+		src_v = src->band_v + i * src_px_stride;
+		src_d = src->band_d + i * src_px_stride;
+		for (j = left; j < right; ++j) {
+			float abs_csf_o_val_h = fabsf(rfactor[0] * src_h[j]);
+			float abs_csf_o_val_v = fabsf(rfactor[1] * src_v[j]);
+			float abs_csf_o_val_d = fabsf(rfactor[2] * src_d[j]);
+
+			val = abs_csf_o_val_h * abs_csf_o_val_h * abs_csf_o_val_h;
+			accum_inner_h += val;
+			val = abs_csf_o_val_v * abs_csf_o_val_v * abs_csf_o_val_v;
+			accum_inner_v += val;
+			val = abs_csf_o_val_d * abs_csf_o_val_d * abs_csf_o_val_d;
+			accum_inner_d += val;
+		}
+
+		accum_h += accum_inner_h;
+		accum_v += accum_inner_v;
+		accum_d += accum_inner_d;
+
+	}
+
+	den_scale_h = powf(accum_h, 1.0f / 3.0f) + powf((bottom - top) * (right - left) / 32.0f, 1.0f / 3.0f);
+	den_scale_v = powf(accum_v, 1.0f / 3.0f) + powf((bottom - top) * (right - left) / 32.0f, 1.0f / 3.0f);
+	den_scale_d = powf(accum_d, 1.0f / 3.0f) + powf((bottom - top) * (right - left) / 32.0f, 1.0f / 3.0f);
+
+	return(den_scale_h + den_scale_v + den_scale_d);
+
+}
+#endif
+
+#if ADM_OPT_ENABLE
+void adm_cm_thresh_s(const adm_dwt_band_t_s *src, float *dst, int w, int h, int src_stride, int dst_stride)
+{
+	const float *angles[3] = { src->band_h, src->band_v, src->band_d };
+	const float *src_ptr;
+
+	int src_px_stride = src_stride / sizeof(float);
+	int dst_px_stride = dst_stride / sizeof(float);
+
+	float fcoeff, imgcoeff;
+
+	int theta, i, j, fi, fj, ii, jj;
+
+	/* i = 0, j = 0: indices y: 1,0,1, x: 1,0,1 */
+	{
+		float accum;
+		ADM_CM_THRESH_S_0_0(angles, src_px_stride, &accum, w, h, 0, 0)
+			dst[0] = accum;
+	}
+
+	/* i = 0, j = w-1: indices y: 1,0,1, x: w-2, w-1, w-1 */
+	{
+		float accum;
+		ADM_CM_THRESH_S_0_W_M_1(angles, src_px_stride, &accum, w, h, 0, (w - 1))
+			dst[w - 1] = accum;
+	}
+
+	/* i = 0, j = 1, ..., w-2: indices y: 1,0,1, x: j-1,j,j+1 */
+	for (j = 1; j < (w - 1); ++j) {
+		float accum;
+		ADM_CM_THRESH_S_0_J(angles, src_px_stride, &accum, w, h, 0, j)
+			dst[j] = accum;
+	}
+
+	/* i = h-1, j = 0: indices y: h-2,h-1,h-1, x: 1,0,1 */
+	{
+		float accum;
+		ADM_CM_THRESH_S_H_M_1_0(angles, src_px_stride, &accum, w, h, (h - 1), 0)
+			dst[dst_px_stride*(h - 1)] = accum;
+	}
+
+	/* i = h-1, j = w-1: indices y: h-2,h-1,h-1, x: w-2, w-1, w-1 */
+	{
+		float accum;
+		ADM_CM_THRESH_S_H_M_1_W_M_1(angles, src_px_stride, &accum, w, h, (h - 1), (w - 1))
+			dst[dst_px_stride*(h - 1) + w - 1] = accum;
+	}
+
+	/* i = h-1, j = 1, ..., w-2: indices y: h-2,h-1,h-1, x: j-1,j,j+1 */
+	for (j = 1; j < (w - 1); ++j) {
+		float accum;
+		ADM_CM_THRESH_S_H_M_1_J(angles, src_px_stride, &accum, w, h, (h - 1), j)
+			dst[dst_px_stride*(h - 1) + j] = accum;
+	}
+
+	/* i = 1,..,h-2, j = 1,..,w-2: indices y: i-1,i,i+1, x: j-1,j,j+1 */
+	for (i = 1; i < (h - 1); ++i) {
+		for (j = 1; j < (w - 1); ++j) {
+			float accum;
+			ADM_CM_THRESH_S_I_J(angles, src_px_stride, &accum, w, h, i, j)
+				dst[dst_px_stride*i + j] = accum;
+		}
+	}
+
+}
+#else
 void adm_cm_thresh_s(const adm_dwt_band_t_s *src, float *dst, int w, int h, int src_stride, int dst_stride)
 {
     const float *angles[3] = { src->band_h, src->band_v, src->band_d };
@@ -257,7 +574,251 @@ void adm_cm_thresh_s(const adm_dwt_band_t_s *src, float *dst, int w, int h, int
         }
     }
 }
+#endif
 
+#if ADM_OPT_ENABLE
+float adm_cm_s(const adm_dwt_band_t_s *src, const adm_dwt_band_t_s *dst, const adm_dwt_band_t_s *csf_a, int w, int h, int src_stride, int dst_stride, int csf_a_stride, double border_factor, int scale)
+{
+	/* Take decouple_r as src and do dsf_s on decouple_r here to get csf_r */
+	float *src_h = src->band_h, *src_v = src->band_v, *src_d = src->band_d;
+
+	// for ADM: scales goes from 0 to 3 but in noise floor paper, it goes from
+	// 1 to 4 (from finest scale to coarsest scale).
+	float factor1 = dwt_quant_step(&dwt_7_9_YCbCr_threshold[0], scale, 1);
+	float factor2 = dwt_quant_step(&dwt_7_9_YCbCr_threshold[0], scale, 2);
+	float rfactor[3] = { 1.0f / factor1, 1.0f / factor1, 1.0f / factor2 };
+
+	const float *angles[3] = { csf_a->band_h, csf_a->band_v, csf_a->band_d };
+
+	int src_px_stride = src_stride / sizeof(float);
+	int dst_px_stride = dst_stride / sizeof(float);
+	int csf_px_stride = csf_a_stride / sizeof(float);
+
+	float xh, xv, xd, thr;
+
+	float val;
+	float accum_h = 0, accum_v = 0, accum_d = 0;
+	float accum_inner_h, accum_inner_v, accum_inner_d;
+	float num_scale_h, num_scale_v, num_scale_d;
+	
+	/* The computation of the scales is not required for the regions which lie outside the frame borders */
+	int left = w * border_factor - 0.5;
+	int top = h * border_factor - 0.5;
+	int right = w - left;
+	int bottom = h - top;
+
+	int start_col = (left > 1) ? left : 1;
+	int end_col = (right < (w - 1)) ? right : (w - 1);
+	int start_row = (top > 1) ? top : 1;
+	int end_row = (bottom < (h - 1)) ? bottom : (h - 1);
+
+	int i, j;
+
+	/* i=0,j=0 */
+	accum_inner_h = 0;
+	accum_inner_v = 0;
+	accum_inner_d = 0;
+	if ((top <= 0) && (left <= 0))
+	{
+		xh = src->band_h[0] * rfactor[0];
+		xv = src->band_v[0] * rfactor[1];
+		xd = src->band_d[0] * rfactor[2];
+		ADM_CM_THRESH_S_0_0(angles, csf_px_stride, &thr, w, h, 0, 0);
+
+		xh = fabsf(xh) - thr;
+		xv = fabsf(xv) - thr;
+		xd = fabsf(xd) - thr;
+
+		xh = xh < 0.0f ? 0.0f : xh;
+		xv = xv < 0.0f ? 0.0f : xv;
+		xd = xd < 0.0f ? 0.0f : xd;
+
+		val = (xh * xh * xh);
+		accum_inner_h += val;
+		val = (xv * xv * xv);
+		accum_inner_v += val;
+		val = (xd * xd * xd);
+		accum_inner_d += val;
+
+	}
+
+	/* i=0,j=w-1 */
+	if ((top <= 0) && (right > (w - 1)))
+	{
+		xh = src->band_h[w - 1] * rfactor[0];
+		xv = src->band_v[w - 1] * rfactor[1];
+		xd = src->band_d[w - 1] * rfactor[2];
+		ADM_CM_THRESH_S_0_W_M_1(angles, csf_px_stride, &thr, w, h, 0, (w - 1));
+
+		xh = fabsf(xh) - thr;
+		xv = fabsf(xv) - thr;
+		xd = fabsf(xd) - thr;
+
+		xh = xh < 0.0f ? 0.0f : xh;
+		xv = xv < 0.0f ? 0.0f : xv;
+		xd = xd < 0.0f ? 0.0f : xd;
+
+		val = (xh * xh * xh);
+		accum_inner_h += val;
+		val = (xv * xv * xv);
+		accum_inner_v += val;
+		val = (xd * xd * xd);
+		accum_inner_d += val;
+
+	}
+
+	/* i=0, j */
+	if (top <= 0) {
+		for (j = start_col; j < end_col; ++j) {
+			xh = src->band_h[j] * rfactor[0];
+			xv = src->band_v[j] * rfactor[1];
+			xd = src->band_d[j] * rfactor[2];
+			ADM_CM_THRESH_S_0_J(angles, csf_px_stride, &thr, w, h, 0, j);
+
+			xh = fabsf(xh) - thr;
+			xv = fabsf(xv) - thr;
+			xd = fabsf(xd) - thr;
+
+			xh = xh < 0.0f ? 0.0f : xh;
+			xv = xv < 0.0f ? 0.0f : xv;
+			xd = xd < 0.0f ? 0.0f : xd;
+
+			val = (xh * xh * xh);
+			accum_inner_h += val;
+			val = (xv * xv * xv);
+			accum_inner_v += val;
+			val = (xd * xd * xd);
+			accum_inner_d += val;
+
+		}
+	}
+
+	accum_h += accum_inner_h;
+	accum_v += accum_inner_v;
+	accum_d += accum_inner_d;
+
+	accum_inner_h = 0;
+	accum_inner_v = 0;
+	accum_inner_d = 0;
+
+	/* i=h-1,j=0 */
+	if ((bottom > (h - 1)) && (left <= 0))
+	{
+		xh = src->band_h[(h - 1) * src_px_stride] * rfactor[0];
+		xv = src->band_v[(h - 1) * src_px_stride] * rfactor[1];
+		xd = src->band_d[(h - 1) * src_px_stride] * rfactor[2];
+		ADM_CM_THRESH_S_H_M_1_0(angles, csf_px_stride, &thr, w, h, (h - 1), 0);
+
+		xh = fabsf(xh) - thr;
+		xv = fabsf(xv) - thr;
+		xd = fabsf(xd) - thr;
+
+		xh = xh < 0.0f ? 0.0f : xh;
+		xv = xv < 0.0f ? 0.0f : xv;
+		xd = xd < 0.0f ? 0.0f : xd;
+
+		val = (xh * xh * xh);
+		accum_inner_h += val;
+		val = (xv * xv * xv);
+		accum_inner_v += val;
+		val = (xd * xd * xd);
+		accum_inner_d += val;
+
+	}
+
+	/* i-h-1,j=w-1 */
+	if ((bottom > (h - 1)) && (right > (w - 1)))
+	{
+		xh = src->band_h[(h - 1) * src_px_stride + w - 1] * rfactor[0];
+		xv = src->band_v[(h - 1) * src_px_stride + w - 1] * rfactor[1];
+		xd = src->band_d[(h - 1) * src_px_stride + w - 1] * rfactor[2];
+		ADM_CM_THRESH_S_H_M_1_W_M_1(angles, csf_px_stride, &thr, w, h, (h - 1), (w - 1));
+
+		xh = fabsf(xh) - thr;
+		xv = fabsf(xv) - thr;
+		xd = fabsf(xd) - thr;
+
+		xh = xh < 0.0f ? 0.0f : xh;
+		xv = xv < 0.0f ? 0.0f : xv;
+		xd = xd < 0.0f ? 0.0f : xd;
+
+		val = (xh * xh * xh);
+		accum_inner_h += val;
+		val = (xv * xv * xv);
+		accum_inner_v += val;
+		val = (xd * xd * xd);
+		accum_inner_d += val;
+
+	}
+
+	/* i=h-1,j */
+	if (bottom > (h - 1)) {
+		for (j = start_col; j < end_col; ++j) {
+			xh = src->band_h[(h - 1) * src_px_stride + j] * rfactor[0];
+			xv = src->band_v[(h - 1) * src_px_stride + j] * rfactor[1];
+			xd = src->band_d[(h - 1) * src_px_stride + j] * rfactor[2];
+			ADM_CM_THRESH_S_H_M_1_J(angles, csf_px_stride, &thr, w, h, (h - 1), j);
+
+			xh = fabsf(xh) - thr;
+			xv = fabsf(xv) - thr;
+			xd = fabsf(xd) - thr;
+
+			xh = xh < 0.0f ? 0.0f : xh;
+			xv = xv < 0.0f ? 0.0f : xv;
+			xd = xd < 0.0f ? 0.0f : xd;
+
+			val = (xh * xh * xh);
+			accum_inner_h += val;
+			val = (xv * xv * xv);
+			accum_inner_v += val;
+			val = (xd * xd * xd);
+			accum_inner_d += val;
+
+		}
+	}
+
+	accum_h += accum_inner_h;
+	accum_v += accum_inner_v;
+	accum_d += accum_inner_d;
+
+	for (i = start_row; i < end_row; ++i) {
+		accum_inner_h = 0;
+		accum_inner_v = 0;
+		accum_inner_d = 0;
+		for (j = start_col; j < end_col; ++j) {
+			xh = src->band_h[i * src_px_stride + j] * rfactor[0];
+			xv = src->band_v[i * src_px_stride + j] * rfactor[1];
+			xd = src->band_d[i * src_px_stride + j] * rfactor[2];
+			ADM_CM_THRESH_S_I_J(angles, csf_px_stride, &thr, w, h, i, j);
+
+			xh = fabsf(xh) - thr;
+			xv = fabsf(xv) - thr;
+			xd = fabsf(xd) - thr;
+
+			xh = xh < 0.0f ? 0.0f : xh;
+			xv = xv < 0.0f ? 0.0f : xv;
+			xd = xd < 0.0f ? 0.0f : xd;
+
+			val = (xh * xh * xh);
+			accum_inner_h += val;
+			val = (xv * xv * xv);
+			accum_inner_v += val;
+			val = (xd * xd * xd);
+			accum_inner_d += val;
+
+		}
+		accum_h += accum_inner_h;
+		accum_v += accum_inner_v;
+		accum_d += accum_inner_d;
+	}
+
+	num_scale_h = powf(accum_h, 1.0f / 3.0f) + powf((bottom - top) * (right - left) / 32.0f, 1.0f / 3.0f);
+	num_scale_v = powf(accum_v, 1.0f / 3.0f) + powf((bottom - top) * (right - left) / 32.0f, 1.0f / 3.0f);
+	num_scale_d = powf(accum_d, 1.0f / 3.0f) + powf((bottom - top) * (right - left) / 32.0f, 1.0f / 3.0f);
+
+	return (num_scale_h + num_scale_v + num_scale_d);
+}
+#else
 void adm_cm_s(const adm_dwt_band_t_s *src, const adm_dwt_band_t_s *dst, const float *thresh, int w, int h, int src_stride, int dst_stride, int thresh_stride)
 {
     int src_px_stride = src_stride / sizeof(float);
@@ -289,7 +850,154 @@ void adm_cm_s(const adm_dwt_band_t_s *src, const adm_dwt_band_t_s *dst, const fl
         }
     }
 }
+#endif
+
+#if ADM_OPT_ENABLE
+// This function stores the imgcoeff values used in adm_dwt2_s in buffers, which reduces the control code cycles.
+void dwt2_src_indices_filt_s(int **src_ind_y, int **src_ind_x, int w, int h)
+{
+	int i, j;
+	int ind0, ind1, ind2, ind3;
+	/* Vertical pass */
+	for (i = 0; i < (h + 1) / 2; ++i) { /* Index = 2 * i - 1 + fi */
+		ind0 = 2 * i - 1;
+		ind0 = (ind0 < 0) ? -ind0 : ((ind0 >= h) ? (2 * h - ind0 - 1) : ind0);
+		ind1 = 2 * i;
+		if (ind1 >= h) {
+			ind1 = (2 * h - ind1 - 1);
+		}
+		ind2 = 2 * i + 1;
+		if (ind2 >= h) {
+			ind2 = (2 * h - ind2 - 1);
+		}
+		ind3 = 2 * i + 2;
+		if (ind3 >= h) {
+			ind3 = (2 * h - ind3 - 1);
+		}
+		src_ind_y[0][i] = ind0;
+		src_ind_y[1][i] = ind1;
+		src_ind_y[2][i] = ind2;
+		src_ind_y[3][i] = ind3;
+	}
+	/* Horizontal pass */
+	for (j = 0; j < (w + 1) / 2; ++j) { /* Index = 2 * j - 1 + fj */
+		ind0 = 2 * j - 1;
+		ind0 = (ind0 < 0) ? -ind0 : ((ind0 >= w) ? (2 * w - ind0 - 1) : ind0);
+		ind1 = 2 * j;
+		if (ind1 >= w) {
+			ind1 = (2 * w - ind1 - 1);
+		}
+		ind2 = 2 * j + 1;
+		if (ind2 >= w) {
+			ind2 = (2 * w - ind2 - 1);
+		}
+		ind3 = 2 * j + 2;
+		if (ind3 >= w) {
+			ind3 = (2 * w - ind3 - 1);
+		}
+		src_ind_x[0][j] = ind0;
+		src_ind_x[1][j] = ind1;
+		src_ind_x[2][j] = ind2;
+		src_ind_x[3][j] = ind3;
+	}
+}
 
+#endif
+
+#if ADM_OPT_ENABLE
+void adm_dwt2_s(const float *src, const adm_dwt_band_t_s *dst, int **ind_y, int **ind_x, int w, int h, int src_stride, int dst_stride)
+{
+	const float *filter_lo = dwt2_db2_coeffs_lo_s;
+	const float *filter_hi = dwt2_db2_coeffs_hi_s;
+	int fwidth = sizeof(dwt2_db2_coeffs_lo_s) / sizeof(float);
+
+	int src_px_stride = src_stride / sizeof(float);
+	int dst_px_stride = dst_stride / sizeof(float);
+
+	float *tmplo = aligned_malloc(ALIGN_CEIL(sizeof(float) * w), MAX_ALIGN);
+	float *tmphi = aligned_malloc(ALIGN_CEIL(sizeof(float) * w), MAX_ALIGN);
+	float fcoeff_lo, fcoeff_hi, imgcoeff;
+	float s0, s1, s2, s3;
+	float accum;
+
+	int i, j, fi, fj, ii, jj;
+	int j0, j1, j2, j3;
+
+	for (i = 0; i < (h + 1) / 2; ++i) {
+		/* Vertical pass. */
+		for (j = 0; j < w; ++j) {
+			s0 = src[ind_y[0][i] * src_px_stride + j];
+			s1 = src[ind_y[1][i] * src_px_stride + j];
+			s2 = src[ind_y[2][i] * src_px_stride + j];
+			s3 = src[ind_y[3][i] * src_px_stride + j];
+			
+			
+			accum = 0;
+			accum += filter_lo[0] * s0;
+			accum += filter_lo[1] * s1;
+			accum += filter_lo[2] * s2;
+			accum += filter_lo[3] * s3;
+			tmplo[j] = accum;
+			
+			accum = 0;
+			accum += filter_hi[0] * s0;
+			accum += filter_hi[1] * s1;
+			accum += filter_hi[2] * s2;
+			accum += filter_hi[3] * s3;
+			tmphi[j] = accum;
+		}
+
+		/* Horizontal pass (lo and hi). */
+		for (j = 0; j < (w + 1) / 2; ++j) {
+
+			j0 = ind_x[0][j];
+			j1 = ind_x[1][j];
+			j2 = ind_x[2][j];
+			j3 = ind_x[3][j];
+			s0 = tmplo[j0];
+			s1 = tmplo[j1];
+			s2 = tmplo[j2];
+			s3 = tmplo[j3];
+			
+			accum = 0;
+			accum += filter_lo[0] * s0;
+			accum += filter_lo[1] * s1;
+			accum += filter_lo[2] * s2;
+			accum += filter_lo[3] * s3;
+			dst->band_a[i * dst_px_stride + j] = accum;
+			
+			accum = 0;
+			accum += filter_hi[0] * s0;
+			accum += filter_hi[1] * s1;
+			accum += filter_hi[2] * s2;
+			accum += filter_hi[3] * s3;
+			dst->band_v[i * dst_px_stride + j] = accum;
+			s0 = tmphi[j0];
+			s1 = tmphi[j1];
+			s2 = tmphi[j2];
+			s3 = tmphi[j3];
+			
+			accum = 0;
+			accum += filter_lo[0] * s0;
+			accum += filter_lo[1] * s1;
+			accum += filter_lo[2] * s2;
+			accum += filter_lo[3] * s3;
+			dst->band_h[i * dst_px_stride + j] = accum;
+			
+			accum = 0;
+			accum += filter_hi[0] * s0;
+			accum += filter_hi[1] * s1;
+			accum += filter_hi[2] * s2;
+			accum += filter_hi[3] * s3;
+			dst->band_d[i * dst_px_stride + j] = accum;
+
+		}
+	}
+
+	aligned_free(tmplo);
+	aligned_free(tmphi);
+}
+#else
 void adm_dwt2_s(const float *src, const adm_dwt_band_t_s *dst, int w, int h, int src_stride, int dst_stride)
 {
     const float *filter_lo = dwt2_db2_coeffs_lo_s;
@@ -391,6 +1099,7 @@ void adm_dwt2_s(const float *src, const adm_dwt_band_t_s *dst, int w, int h, int
     aligned_free(tmplo);
     aligned_free(tmphi);
 }
+#endif
 
 void adm_buffer_copy(const void *src, void *dst, int linewidth, int h, int src_stride, int dst_stride)
 {
diff --git a/feature/src/adm_tools.h b/feature/src/adm_tools.h
index 45d2868d0..a9c93f591 100644
--- a/feature/src/adm_tools.h
+++ b/feature/src/adm_tools.h
@@ -25,6 +25,108 @@
 #ifndef ADM_TOOLS_H_
 #define ADM_TOOLS_H_
 
+#define ADM_OPT_ENABLE 1
+
+#if ADM_OPT_ENABLE
+// i = 0, j = 0: indices y: 1,0,1, x: 1,0,1 
+#define ADM_CM_THRESH_S_0_0(angles,src_px_stride,accum,w,h,i,j) \
+{ \
+	*accum = 0; \
+	for (int theta = 0; theta < 3; ++theta) { \
+		float *src_ptr = angles[theta]; \
+		float row1 = FLOAT_ONE_BY_30 * (2 * fabsf(src_ptr[src_px_stride + 1]) + fabsf(src_ptr[src_px_stride])); \
+		float row2 = FLOAT_ONE_BY_30 * 2 * fabsf(src_ptr[1]) + FLOAT_ONE_BY_15 * fabsf(src_ptr[0]); \
+		float row3 = row1; \
+		*accum += (row1 + row2 + row3); \
+	} \
+}
+
+// i = 0, j = w-1: indices y: 1,0,1, x: w-2, w-1, w-1 
+#define ADM_CM_THRESH_S_0_W_M_1(angles,src_px_stride,accum,w,h,i,j) \
+{ \
+	*accum = 0; \
+	for (int theta = 0; theta < 3; ++theta) { \
+		float *src_ptr = angles[theta]; \
+		float row1 = FLOAT_ONE_BY_30 * (2 * fabsf(src_ptr[src_px_stride + w - 1]) + fabsf(src_ptr[src_px_stride + w - 2])); \
+		float row2 = FLOAT_ONE_BY_30 * (fabsf(src_ptr[w - 2]) + fabsf(src_ptr[w - 1])) + FLOAT_ONE_BY_15 * fabsf(src_ptr[w - 1]); \
+		float row3 = row1; \
+		*accum += (row1 + row2 + row3); \
+	} \
+}
+
+// i = 0, j = 1, ..., w-2: indices y: 1,0,1, x: j-1,j,j+1 
+#define ADM_CM_THRESH_S_0_J(angles,src_px_stride,accum,w,h,i,j) \
+{ \
+	*accum = 0; \
+	for (int theta = 0; theta < 3; ++theta) { \
+		float *src_ptr = angles[theta]; \
+		float row1 = FLOAT_ONE_BY_30 * (fabsf(src_ptr[src_px_stride + j - 1]) + fabsf(src_ptr[src_px_stride + j]) + fabsf(src_ptr[src_px_stride + j + 1])); \
+		float row2 = FLOAT_ONE_BY_30 * (fabsf(src_ptr[j - 1]) + fabsf(src_ptr[j + 1])) + FLOAT_ONE_BY_15 * fabsf(src_ptr[j]); \
+		float row3 = row1; \
+		*accum += (row1 + row2 + row3); \
+	} \
+}
+
+// i = h-1, j = 0: indices y: h-2,h-1,h-1, x: 1,0,1 
+#define ADM_CM_THRESH_S_H_M_1_0(angles,src_px_stride,accum,w,h,i,j) \
+{ \
+	*accum = 0; \
+	for (int theta = 0; theta < 3; ++theta) { \
+		float *src_ptr = angles[theta]; \
+		src_ptr += (src_px_stride * (h - 2)); \
+		float row1 = FLOAT_ONE_BY_30 * (2 * fabsf(src_ptr[1]) + fabsf(src_ptr[0])); \
+		float row2 = FLOAT_ONE_BY_30 * 2 * fabsf(src_ptr[src_px_stride + 1]) + FLOAT_ONE_BY_15 * fabsf(src_ptr[src_px_stride]); \
+		float row3 = row2; \
+		*accum += (row1 + row2 + row3); \
+	} \
+}
+
+// i = h-1, j = w-1: indices y: h-2,h-1,h-1, x: w-2, w-1, w-1 
+#define ADM_CM_THRESH_S_H_M_1_W_M_1(angles,src_px_stride,accum,w,h,i,j) \
+{ \
+	*accum = 0; \
+	for (int theta = 0; theta < 3; ++theta) { \
+		float *src_ptr = angles[theta]; \
+		src_ptr += (src_px_stride * (h - 2)); \
+		float row1 = FLOAT_ONE_BY_30 * (2 * fabsf(src_ptr[w - 1]) + fabsf(src_ptr[w - 2])); \
+		float row2 = FLOAT_ONE_BY_30 * (fabsf(src_ptr[src_px_stride + w - 2]) + fabsf(src_ptr[src_px_stride + w - 1])) + FLOAT_ONE_BY_15 * fabsf(src_ptr[src_px_stride + w - 1]); \
+		float row3 = row2; \
+		*accum += (row1 + row2 + row3); \
+	} \
+}
+
+// i = h-1, j = 1, ..., w-2: indices y: h-2,h-1,h-1, x: j-1,j,j+1 
+#define ADM_CM_THRESH_S_H_M_1_J(angles,src_px_stride,accum,w,h,i,j) \
+{ \
+	*accum = 0; \
+	for (int theta = 0; theta < 3; ++theta) { \
+		float *src_ptr = angles[theta]; \
+		src_ptr += (src_px_stride * (h - 2)); \
+		float row1 = FLOAT_ONE_BY_30 * (fabsf(src_ptr[j - 1]) + fabsf(src_ptr[j]) + fabsf(src_ptr[j + 1])); \
+		float row2 = FLOAT_ONE_BY_30 * (fabsf(src_ptr[src_px_stride + j - 1]) + fabsf(src_ptr[src_px_stride + j + 1])) + FLOAT_ONE_BY_15 * fabsf(src_ptr[src_px_stride + j]); \
+		float row3 = row2; \
+		*accum += (row1 + row2 + row3); \
+	} \
+}
+
+// i = 1,..,h-2, j = 1,..,w-2: indices y: i-1,i,i+1, x: j-1,j,j+1 
+#define ADM_CM_THRESH_S_I_J(angles,src_px_stride,accum,w,h,i,j) \
+{ \
+	float sum = 0; \
+	for (int theta = 0; theta < 3; ++theta) { \
+		float *src_ptr = angles[theta]; \
+		src_ptr += (src_px_stride * (i-1)); \
+		float row1 = FLOAT_ONE_BY_30 * (fabsf(src_ptr[j - 1]) + fabsf(src_ptr[j]) + fabsf(src_ptr[j + 1])); \
+		src_ptr += src_px_stride; \
+		float row2 = FLOAT_ONE_BY_30 * (fabsf(src_ptr[j - 1]) + fabsf(src_ptr[j + 1])) + FLOAT_ONE_BY_15 * fabsf(src_ptr[j]); \
+		src_ptr += src_px_stride; \
+		float row3 = FLOAT_ONE_BY_30 * (fabsf(src_ptr[j - 1]) + fabsf(src_ptr[j]) + fabsf(src_ptr[j + 1])); \
+		sum += (row1 + row2 + row3); \
+	} \
+	*accum = sum; \
+}
+#endif
+
 typedef struct adm_dwt_band_t_s {
     float *band_a; /* Low-pass V + low-pass H. */
     float *band_v; /* Low-pass V + high-pass H. */
@@ -32,6 +134,26 @@ typedef struct adm_dwt_band_t_s {
     float *band_d; /* High-pass V + high-pass H. */
 } adm_dwt_band_t_s;
 
+#if ADM_OPT_ENABLE
+
+float adm_sum_cube_s(const float *x, int w, int h, int stride, double border_factor);
+
+void adm_decouple_s(const adm_dwt_band_t_s *ref, const adm_dwt_band_t_s *dis, const adm_dwt_band_t_s *r, const adm_dwt_band_t_s *a, int w, int h, int ref_stride, int dis_stride, int r_stride, int a_stride, double border_factor);
+
+void adm_csf_s(const adm_dwt_band_t_s *src, const adm_dwt_band_t_s *dst, int orig_h, int scale, int w, int h, int src_stride, int dst_stride, double border_factor);
+
+void adm_cm_thresh_s(const adm_dwt_band_t_s *src, float *dst, int w, int h, int src_stride, int dst_stride);
+
+float adm_csf_den_scale_s(const adm_dwt_band_t_s *src, int orig_h, int scale, int w, int h, int src_stride, double border_factor);
+
+float adm_cm_s(const adm_dwt_band_t_s *src, const adm_dwt_band_t_s *dst, const adm_dwt_band_t_s *csf_a, int w, int h, int src_stride, int dst_stride, int csf_a_stride, double border_factor, int scale);
+
+void dwt2_src_indices_filt_s(int **src_ind_y, int **src_ind_x, int w, int h);
+
+void adm_dwt2_s(const float *src, const adm_dwt_band_t_s *dst, int **ind_y, int **ind_x, int w, int h, int src_stride, int dst_stride);
+
+#else
+
 float adm_sum_cube_s(const float *x, int w, int h, int stride, double border_factor);
 
 void adm_decouple_s(const adm_dwt_band_t_s *ref, const adm_dwt_band_t_s *dis, const adm_dwt_band_t_s *r, const adm_dwt_band_t_s *a, int w, int h, int ref_stride, int dis_stride, int r_stride, int a_stride);
@@ -46,6 +168,8 @@ void adm_dwt2_s(const float *src, const adm_dwt_band_t_s *dst, int w, int h, int
 
 void adm_buffer_copy(const void *src, void *dst, int linewidth, int h, int src_stride, int dst_stride);
 
+#endif
+
 /* ================= */
 /* Noise floor model */
 /* ================= */
diff --git a/feature/src/common/blur_array.c b/feature/src/common/blur_array.c
index 6352e82bc..0714e1a34 100644
--- a/feature/src/common/blur_array.c
+++ b/feature/src/common/blur_array.c
@@ -21,7 +21,9 @@ int init_blur_array(BLUR_BUF_ARRAY* arr, int array_length, size_t size, size_t a
     {
         arr->blur_buf_array[i].frame_idx = -1;
         arr->blur_buf_array[i].blur_buf = aligned_malloc(size, alignement);
-
+#if BUF_OPT_ENABLE
+		arr->blur_buf_array[i].reference_count	= 0;
+#endif
         if (arr->blur_buf_array[i].blur_buf == 0)
             return 0;
 
@@ -39,6 +41,32 @@ int init_blur_array(BLUR_BUF_ARRAY* arr, int array_length, size_t size, size_t a
  */
 float* get_blur_buf(BLUR_BUF_ARRAY* arr, int search_frame_idx)
 {
+#if BUF_OPT_ENABLE
+    int array_length = arr->actual_length;
+    BLUR_BUF_STRUCT* s = arr->blur_buf_array;
+	float *ret = NULL;
+
+    pthread_mutex_lock(&arr->block);
+
+    for (int i = 0; i < array_length; i++)
+    {
+		if (s->frame_idx == search_frame_idx)
+		{
+			/* Increment reference counter */
+			s->reference_count++;
+			
+			ret = s->blur_buf;
+			break;
+		}
+
+		// next array item
+		s++;
+     }  
+
+    pthread_mutex_unlock(&arr->block);
+
+    return ret;
+#else
     // find item for the search_frame_idx
     while (1)
     {
@@ -61,7 +89,7 @@ float* get_blur_buf(BLUR_BUF_ARRAY* arr, int search_frame_idx)
 
         pthread_mutex_unlock(&arr->block);
     }
-
+#endif
     return 0;
 }
 
@@ -99,7 +127,11 @@ int put_blur_buf(BLUR_BUF_ARRAY* arr, int frame_idx, float* blur_buf)
 /*
  * resets the slot in the array to -1 to indicate that the buffer can be used again
  */
+#if BUF_OPT_ENABLE
+int release_blur_buf_slot(BLUR_BUF_ARRAY* arr, int search_frame_idx)
+#else
 int release_blur_buf(BLUR_BUF_ARRAY* arr, int search_frame_idx)
+#endif
 {
     int ret = 0;
     int array_length = arr->actual_length;
@@ -111,8 +143,20 @@ int release_blur_buf(BLUR_BUF_ARRAY* arr, int search_frame_idx)
     {
         if (s->frame_idx == search_frame_idx)
         {
+#if BUF_OPT_ENABLE
+			if(s->reference_count <= 0)
+			{
+				s->frame_idx = -1;
+				ret = 1;
+			}
+			else
+			{
+				ret = -1;
+			}
+#else
             s->frame_idx = -1;
             ret = 1;
+#endif			
             break;
         }
 
@@ -144,3 +188,96 @@ void free_blur_buf(BLUR_BUF_ARRAY* arr)
 
     pthread_mutex_destroy(&arr->block);
 }
+
+#if BUF_OPT_ENABLE
+/*
+ * finds a free slot in the array, assigns the new frame index and returns the free buffer pointer
+ * This increases the reference count for this slot
+ */
+float* get_free_blur_buf_slot(BLUR_BUF_ARRAY* arr, int frame_idx)
+{
+    int array_length = arr->actual_length;
+    BLUR_BUF_STRUCT* s = arr->blur_buf_array;
+	float *ret = NULL;
+    pthread_mutex_lock(&arr->block);
+	
+    for (int i = 0; i < array_length; i++)
+    {
+        if (s->frame_idx == -1)
+        {			
+            s->frame_idx = frame_idx;
+			
+			/* Increment reference counter */
+			s->reference_count++;
+			
+			ret = s->blur_buf;
+			break;
+        }
+
+        // next array item
+        s++;
+    }
+    pthread_mutex_unlock(&arr->block);
+
+    return ret;
+}
+
+/*
+ * Returns the reference counter for the frame index if found, -1 otherwise
+*/
+int get_blur_buf_reference_count(BLUR_BUF_ARRAY* arr, int frame_idx)
+{
+    int array_length = arr->actual_length;
+    BLUR_BUF_STRUCT* s = arr->blur_buf_array;
+	int ret = -1;
+
+    pthread_mutex_lock(&arr->block);
+
+    for (int i = 0; i < array_length; i++)
+    {
+        if (s->frame_idx == frame_idx)
+        {
+			ret = s->reference_count;
+			break;
+        }
+
+        // next array item
+        s++;
+    }
+
+    pthread_mutex_unlock(&arr->block);
+
+    return ret;
+}
+
+/*
+ * releases the reference for the slot in the array which matches the search frame index
+ */
+int release_blur_buf_reference(BLUR_BUF_ARRAY* arr, int search_frame_idx)
+{
+    int ret = -1;
+    int array_length = arr->actual_length;
+    BLUR_BUF_STRUCT* s = arr->blur_buf_array;
+
+    pthread_mutex_lock(&arr->block);
+	
+    for (int i = 0; i < array_length; i++)
+    {
+        if (s->frame_idx == search_frame_idx)
+        {
+			s->reference_count--;
+			ret = 0;
+			break;
+		}
+
+        // next struct
+        s++;
+    }
+
+    pthread_mutex_unlock(&arr->block);
+
+    return ret;
+}
+
+
+#endif
diff --git a/feature/src/common/blur_array.h b/feature/src/common/blur_array.h
index 6ca7a27d9..1209a590f 100644
--- a/feature/src/common/blur_array.h
+++ b/feature/src/common/blur_array.h
@@ -12,11 +12,19 @@
 #include "pthread.h"
 #include "alloc.h"
 
+#ifdef MULTI_THREADING
+#define BUF_OPT_ENABLE 1
+#else
+#define BUF_OPT_ENABLE 0
+#endif
+
 #define MAX_NUM_THREADS 128
 typedef struct
 {
     int frame_idx;
     float *blur_buf;
+    int reference_count;
+
 
 } BLUR_BUF_STRUCT;
 
@@ -31,12 +39,26 @@ typedef struct
 
 int init_blur_array(BLUR_BUF_ARRAY* arr, int array_length, size_t size, size_t alignement);
 
-float* get_blur_buf(BLUR_BUF_ARRAY* arr, int search_frame_idx);
+#if BUF_OPT_ENABLE
 
-int put_blur_buf(BLUR_BUF_ARRAY* arr, int frame_idx, float* blur_buf);
+float* get_free_blur_buf_slot(BLUR_BUF_ARRAY* arr, int frame_idx);
+
+int get_blur_buf_reference_count(BLUR_BUF_ARRAY* arr, int frame_idx);
+
+int release_blur_buf_slot(BLUR_BUF_ARRAY* arr, int search_frame_idx);
+
+int release_blur_buf_reference(BLUR_BUF_ARRAY* arr, int search_frame_idx);
+
+#else
 
 int release_blur_buf(BLUR_BUF_ARRAY* arr, int search_frame_idx);
 
+#endif
+
+float* get_blur_buf(BLUR_BUF_ARRAY* arr, int search_frame_idx);
+
+int put_blur_buf(BLUR_BUF_ARRAY* arr, int frame_idx, float* blur_buf);
+
 void free_blur_buf(BLUR_BUF_ARRAY* arr);
 
 #endif /* VMAF_FEATURE_SRC_BLUR_ARRAY_H_ */
diff --git a/feature/src/common/convolution.c b/feature/src/common/convolution.c
index e8691cb6d..dcea7b4ea 100644
--- a/feature/src/common/convolution.c
+++ b/feature/src/common/convolution.c
@@ -17,6 +17,7 @@
  */
 
 #include "alignment.h"
+#include "vif_options.h"
 #include "convolution.h"
 #include "convolution_internal.h"
 #include "cpu.h"
diff --git a/feature/src/common/convolution.h b/feature/src/common/convolution.h
index 396077d16..df7990018 100644
--- a/feature/src/common/convolution.h
+++ b/feature/src/common/convolution.h
@@ -37,4 +37,9 @@ void convolution_f32_c_s(const float *filter, int filter_width, const float *src
 
 void convolution_f32_avx_s(const float *filter, int filter_width, const float *src, float *dst, float *tmp, int width, int height, int src_stride, int dst_stride);
 
+#if VIF_OPT_ENABLE
+void convolution_f32_avx_sq_s(const float *filter, int filter_width, const float *src, float *dst, float *tmp, int width, int height, int src_stride, int dst_stride);
+
+void convolution_f32_avx_xy_s(const float *filter, int filter_width, const float *src1, const float *src2, float *dst, float *tmp, int width, int height, int src1_stride, int src2_stride, int dst_stride);
+#endif
 #endif // CONVOLUTION_H_
diff --git a/feature/src/common/convolution_avx.c b/feature/src/common/convolution_avx.c
index c3251c436..44d22c504 100644
--- a/feature/src/common/convolution_avx.c
+++ b/feature/src/common/convolution_avx.c
@@ -18,6 +18,7 @@
 
 #include <immintrin.h>
 #include "alignment.h"
+#include "vif_options.h"
 #include "convolution.h"
 #include "convolution_internal.h"
 
@@ -28,6 +29,21 @@ FORCE_INLINE inline void convolution_f32_avx_s_1d_v_scanline_5(const float * RES
 FORCE_INLINE inline void convolution_f32_avx_s_1d_v_scanline_9(const float * RESTRICT filter, int filter_width, const float * RESTRICT src, float * RESTRICT dst, int src_stride, int j_end);
 FORCE_INLINE inline void convolution_f32_avx_s_1d_v_scanline_17(const float * RESTRICT filter, int filter_width, const float * RESTRICT src, float * RESTRICT dst, int src_stride, int j_end);
 
+#if VIF_OPT_ENABLE
+FORCE_INLINE inline void convolution_f32_avx_s_1d_h_sq_scanline_5(const float * RESTRICT filter, int filter_width, const float * RESTRICT src, float * RESTRICT dst, int j_end);
+FORCE_INLINE inline void convolution_f32_avx_s_1d_h_sq_scanline_9(const float * RESTRICT filter, int filter_width, const float * RESTRICT src, float * RESTRICT dst, int j_end);
+FORCE_INLINE inline void convolution_f32_avx_s_1d_h_sq_scanline_17(const float * RESTRICT filter, int filter_width, const float * RESTRICT src, float * RESTRICT dst, int j_end);
+FORCE_INLINE inline void convolution_f32_avx_s_1d_v_sq_scanline_5(const float * RESTRICT filter, int filter_width, const float * RESTRICT src, float * RESTRICT dst, int src_stride, int j_end);
+FORCE_INLINE inline void convolution_f32_avx_s_1d_v_sq_scanline_9(const float * RESTRICT filter, int filter_width, const float * RESTRICT src, float * RESTRICT dst, int src_stride, int j_end);
+FORCE_INLINE inline void convolution_f32_avx_s_1d_v_sq_scanline_17(const float * RESTRICT filter, int filter_width, const float * RESTRICT src, float * RESTRICT dst, int src_stride, int j_end);
+
+FORCE_INLINE inline void convolution_f32_avx_s_1d_h_xy_scanline_5(const float * RESTRICT filter, int filter_width, const float * RESTRICT src1, const float * RESTRICT src2, float * RESTRICT dst, int j_end);
+FORCE_INLINE inline void convolution_f32_avx_s_1d_h_xy_scanline_9(const float * RESTRICT filter, int filter_width, const float * RESTRICT src1, const float * RESTRICT src2, float * RESTRICT dst, int j_end);
+FORCE_INLINE inline void convolution_f32_avx_s_1d_h_xy_scanline_17(const float * RESTRICT filter, int filter_width, const float * RESTRICT src1, const float * RESTRICT src2, float * RESTRICT dst, int j_end);
+FORCE_INLINE inline void convolution_f32_avx_s_1d_v_xy_scanline_5(const float * RESTRICT filter, int filter_width, const float * RESTRICT src1, const float * RESTRICT src2, float * RESTRICT dst, int src1_stride, int src2_stride, int j_end);
+FORCE_INLINE inline void convolution_f32_avx_s_1d_v_xy_scanline_9(const float * RESTRICT filter, int filter_width, const float * RESTRICT src1, const float * RESTRICT src2, float * RESTRICT dst, int src1_stride, int src2_stride, int j_end);
+FORCE_INLINE inline void convolution_f32_avx_s_1d_v_xy_scanline_17(const float * RESTRICT filter, int filter_width, const float * RESTRICT src1, const float * RESTRICT src2, float * RESTRICT dst, int src1_stride, int src2_stride, int j_end);
+#endif
 FORCE_INLINE inline static void convolution_f32_avx_s_3x3_2d_scanline(const float * RESTRICT filter, const float * RESTRICT src, float * RESTRICT dst, int src_stride, int j_end)
 {
 	__m256 f00, f01, f02, f10, f11, f12, f20, f21, f22;
@@ -856,3 +872,1779 @@ void convolution_f32_avx_s(const float *filter, int filter_width, const float *s
 		break;
 	}
 }
+
+#if VIF_OPT_ENABLE
+// Filter a single scanline.
+FORCE_INLINE inline static void convolution_f32_avx_s_1d_h_sq_scanline(int N, const float * RESTRICT filter, int filter_width, const float * RESTRICT src, float * RESTRICT dst, int j_end)
+{
+
+	if (N == 5)
+	{
+		convolution_f32_avx_s_1d_h_sq_scanline_5(filter, filter_width, src, dst, j_end);
+	}
+	else if (N == 9)
+	{
+		convolution_f32_avx_s_1d_h_sq_scanline_9(filter, filter_width, src, dst, j_end);
+	}
+	else if (N == 17)
+	{
+		convolution_f32_avx_s_1d_h_sq_scanline_17(filter, filter_width, src, dst, j_end);
+	}
+	else {
+
+		int radius = filter_width / 2;
+
+		for (int x = 0; x < filter_width; x += 9) {
+			__m256 f0, f1, f2, f3, f4, f5, f6, f7, f8;
+
+			f0 = _mm256_setzero_ps();
+			f1 = _mm256_setzero_ps();
+			f2 = _mm256_setzero_ps();
+			f3 = _mm256_setzero_ps();
+			f5 = _mm256_setzero_ps();
+			f6 = _mm256_setzero_ps();
+			f7 = _mm256_setzero_ps();
+			f8 = _mm256_setzero_ps();
+
+			switch (filter_width - x) {
+			default:
+				f8 = _mm256_broadcast_ss(filter + x + 8);
+			case 8:
+				f7 = _mm256_broadcast_ss(filter + x + 7);
+			case 7:
+				f6 = _mm256_broadcast_ss(filter + x + 6);
+			case 6:
+				f5 = _mm256_broadcast_ss(filter + x + 5);
+			case 5:
+				f4 = _mm256_broadcast_ss(filter + x + 4);
+			case 4:
+				f3 = _mm256_broadcast_ss(filter + x + 3);
+			case 3:
+				f2 = _mm256_broadcast_ss(filter + x + 2);
+			case 2:
+				f1 = _mm256_broadcast_ss(filter + x + 1);
+			case 1:
+				f0 = _mm256_broadcast_ss(filter + x + 0);
+			}
+
+			for (int j = 0; j < j_end; j += 8) {
+				__m256 accum = _mm256_setzero_ps();
+				__m256 sum0, sum1, sum2, sum3;
+				__m256 g;
+
+				sum0 = _mm256_setzero_ps();
+				sum1 = _mm256_setzero_ps();
+				sum2 = _mm256_setzero_ps();
+				sum3 = _mm256_setzero_ps();
+
+				switch (filter_width - x) {
+				default:
+					g = _mm256_loadu_ps(src + j + x + 8);
+					g = _mm256_mul_ps(g, g);
+					sum0 = _mm256_mul_ps(f8, g);
+				case 8:
+					g = _mm256_loadu_ps(src + j + x + 7);
+					g = _mm256_mul_ps(g, g);
+					sum3 = _mm256_mul_ps(f7, g);
+				case 7:
+					g = _mm256_loadu_ps(src + j + x + 6);
+					g = _mm256_mul_ps(g, g);
+					sum2 = _mm256_mul_ps(f6, g);
+				case 6:
+					g = _mm256_loadu_ps(src + j + x + 5);
+					g = _mm256_mul_ps(g, g);
+					sum1 = _mm256_mul_ps(f5, g);
+				case 5:
+					g = _mm256_loadu_ps(src + j + x + 4);
+					g = _mm256_mul_ps(g, g);
+					g = _mm256_mul_ps(f4, g);
+					sum0 = _mm256_add_ps(sum0, g);
+				case 4:
+					g = _mm256_loadu_ps(src + j + x + 3);
+					g = _mm256_mul_ps(g, g);
+					g = _mm256_mul_ps(f3, g);
+					sum3 = _mm256_add_ps(sum3, g);
+				case 3:
+					g = _mm256_loadu_ps(src + j + x + 2);
+					g = _mm256_mul_ps(g, g);
+					g = _mm256_mul_ps(f2, g);
+					sum2 = _mm256_add_ps(sum2, g);
+				case 2:
+					g = _mm256_loadu_ps(src + j + x + 1);
+					g = _mm256_mul_ps(g, g);
+					g = _mm256_mul_ps(f1, g);
+					sum1 = _mm256_add_ps(sum1, g);
+				case 1:
+					g = _mm256_loadu_ps(src + j + x + 0);
+					g = _mm256_mul_ps(g, g);
+					g = _mm256_mul_ps(f0, g);
+					sum0 = _mm256_add_ps(sum0, g);
+				}
+
+				sum0 = _mm256_add_ps(sum0, sum2);
+				sum1 = _mm256_add_ps(sum1, sum3);
+
+				sum0 = _mm256_add_ps(sum0, sum1);
+				accum = _mm256_add_ps(accum, sum0);
+
+				if (x)
+					accum = _mm256_add_ps(accum, _mm256_loadu_ps(dst + j + radius));
+
+				_mm256_storeu_ps(dst + j + radius, accum);
+			}
+		}
+
+	}
+}
+
+FORCE_INLINE inline void convolution_f32_avx_s_1d_h_sq_scanline_17(const float * RESTRICT filter, int filter_width, const float * RESTRICT src, float * RESTRICT dst, int j_end)
+{
+	__m256 f0, f1, f2, f3, f4, f5, f6, f7, f8;
+
+	// Evaluate filter taps 0-8
+	f0 = _mm256_broadcast_ss(filter + 0);
+	f1 = _mm256_broadcast_ss(filter + 1);
+	f2 = _mm256_broadcast_ss(filter + 2);
+	f3 = _mm256_broadcast_ss(filter + 3);
+	f4 = _mm256_broadcast_ss(filter + 4);
+	f5 = _mm256_broadcast_ss(filter + 5);
+	f6 = _mm256_broadcast_ss(filter + 6);
+	f7 = _mm256_broadcast_ss(filter + 7);
+	f8 = _mm256_broadcast_ss(filter + 8);
+
+	for (int j = 0; j < j_end; j += 8) {
+		__m256 accum = _mm256_setzero_ps();
+		__m256 sum0, sum1, sum2, sum3;
+		__m256 g;
+
+		g = _mm256_loadu_ps(src + j + 0);
+		g = _mm256_mul_ps(g, g);
+		g = _mm256_mul_ps(f0, g);
+		sum0 = g;
+
+		g = _mm256_loadu_ps(src + j + 1);
+		g = _mm256_mul_ps(g, g);
+		g = _mm256_mul_ps(f1, g);
+		sum1 = g;
+
+		g = _mm256_loadu_ps(src + j + 2);
+		g = _mm256_mul_ps(g, g);
+		g = _mm256_mul_ps(f2, g);
+		sum2 = g;
+
+		g = _mm256_loadu_ps(src + j + 3);
+		g = _mm256_mul_ps(g, g);
+		g = _mm256_mul_ps(f3, g);
+		sum3 = g;
+
+		g = _mm256_loadu_ps(src + j + 4);
+		g = _mm256_mul_ps(g, g);
+		g = _mm256_mul_ps(f4, g);
+		sum0 = _mm256_add_ps(sum0, g);
+
+		g = _mm256_loadu_ps(src + j + 5);
+		g = _mm256_mul_ps(g, g);
+		g = _mm256_mul_ps(f5, g);
+		sum1 = _mm256_add_ps(sum1, g);
+
+		g = _mm256_loadu_ps(src + j + 6);
+		g = _mm256_mul_ps(g, g);
+		g = _mm256_mul_ps(f6, g);
+		sum2 = _mm256_add_ps(sum2, g);
+
+		g = _mm256_loadu_ps(src + j + 7);
+		g = _mm256_mul_ps(g, g);
+		g = _mm256_mul_ps(f7, g);
+		sum3 = _mm256_add_ps(sum3, g);
+
+		g = _mm256_loadu_ps(src + j + 8);
+		g = _mm256_mul_ps(g, g);
+		g = _mm256_mul_ps(f8, g);
+		sum0 = _mm256_add_ps(sum0, g);
+
+		sum0 = _mm256_add_ps(sum0, sum2);
+		sum1 = _mm256_add_ps(sum1, sum3);
+
+		sum0 = _mm256_add_ps(sum0, sum1);
+		accum = _mm256_add_ps(accum, sum0);
+
+		_mm256_store_ps(dst + j + 8, accum); // radius = 8
+	}
+
+	// Evaluate filter taps 9-16
+	f0 = _mm256_broadcast_ss(filter + 9);
+	f1 = _mm256_broadcast_ss(filter + 10);
+	f2 = _mm256_broadcast_ss(filter + 11);
+	f3 = _mm256_broadcast_ss(filter + 12);
+	f4 = _mm256_broadcast_ss(filter + 13);
+	f5 = _mm256_broadcast_ss(filter + 14);
+	f6 = _mm256_broadcast_ss(filter + 15);
+	f7 = _mm256_broadcast_ss(filter + 16);
+
+	for (int j = 0; j < j_end; j += 8) {
+		__m256 sum0, sum1, sum2, sum3;
+		__m256 g;
+
+		float *dst_ptr = dst + j + 8; // radius = 8
+
+		g = _mm256_loadu_ps(src + j + 9);
+		g = _mm256_mul_ps(g, g);
+		g = _mm256_mul_ps(f0, g);
+		sum0 = g;
+
+		g = _mm256_loadu_ps(src + j + 10);
+		g = _mm256_mul_ps(g, g);
+		g = _mm256_mul_ps(f1, g);
+		sum1 = g;
+
+		g = _mm256_loadu_ps(src + j + 11);
+		g = _mm256_mul_ps(g, g);
+		g = _mm256_mul_ps(f2, g);
+		sum2 = g;
+
+		g = _mm256_loadu_ps(src + j + 12);
+		g = _mm256_mul_ps(g, g);
+		g = _mm256_mul_ps(f3, g);
+		sum3 = g;
+
+		g = _mm256_loadu_ps(src + j + 13);
+		g = _mm256_mul_ps(g, g);
+		g = _mm256_mul_ps(f4, g);
+		sum0 = _mm256_add_ps(sum0, g);
+
+		g = _mm256_loadu_ps(src + j + 14);
+		g = _mm256_mul_ps(g, g);
+		g = _mm256_mul_ps(f5, g);
+		sum1 = _mm256_add_ps(sum1, g);
+
+		g = _mm256_loadu_ps(src + j + 15);
+		g = _mm256_mul_ps(g, g);
+		g = _mm256_mul_ps(f6, g);
+		sum2 = _mm256_add_ps(sum2, g);
+
+		g = _mm256_loadu_ps(src + j + 16);
+		g = _mm256_mul_ps(g, g);
+		g = _mm256_mul_ps(f7, g);
+		sum3 = _mm256_add_ps(sum3, g);
+
+		sum0 = _mm256_add_ps(sum0, sum2);
+		sum1 = _mm256_add_ps(sum1, sum3);
+
+		sum0 = _mm256_add_ps(sum0, sum1);
+
+		sum0 = _mm256_add_ps(_mm256_load_ps(dst_ptr), sum0);
+		_mm256_store_ps(dst_ptr, sum0);
+	}
+}
+
+FORCE_INLINE inline void convolution_f32_avx_s_1d_h_sq_scanline_9(const float * RESTRICT filter, int filter_width, const float * RESTRICT src, float * RESTRICT dst, int j_end)
+{
+	__m256 f0, f1, f2, f3, f4, f5, f6, f7, f8;
+
+	f0 = _mm256_broadcast_ss(filter + 0);
+	f1 = _mm256_broadcast_ss(filter + 1);
+	f2 = _mm256_broadcast_ss(filter + 2);
+	f3 = _mm256_broadcast_ss(filter + 3);
+	f4 = _mm256_broadcast_ss(filter + 4);
+	f5 = _mm256_broadcast_ss(filter + 5);
+	f6 = _mm256_broadcast_ss(filter + 6);
+	f7 = _mm256_broadcast_ss(filter + 7);
+	f8 = _mm256_broadcast_ss(filter + 8);
+
+	for (int j = 0; j < j_end; j += 8) {
+		__m256 accum = _mm256_setzero_ps();
+		__m256 sum0, sum1, sum2, sum3;
+		__m256 g;
+
+		g = _mm256_loadu_ps(src + j + 0);
+		g = _mm256_mul_ps(g, g);
+		g = _mm256_mul_ps(f0, g);
+		sum0 = g;
+
+		g = _mm256_loadu_ps(src + j + 1);
+		g = _mm256_mul_ps(g, g);
+		g = _mm256_mul_ps(f1, g);
+		sum1 = g;
+
+		g = _mm256_loadu_ps(src + j + 2);
+		g = _mm256_mul_ps(g, g);
+		g = _mm256_mul_ps(f2, g);
+		sum2 = g;
+
+		g = _mm256_loadu_ps(src + j + 3);
+		g = _mm256_mul_ps(g, g);
+		g = _mm256_mul_ps(f3, g);
+		sum3 = g;
+
+		g = _mm256_loadu_ps(src + j + 4);
+		g = _mm256_mul_ps(g, g);
+		g = _mm256_mul_ps(f4, g);
+		sum0 = _mm256_add_ps(sum0, g);
+
+		g = _mm256_loadu_ps(src + j + 5);
+		g = _mm256_mul_ps(g, g);
+		g = _mm256_mul_ps(f5, g);
+		sum1 = _mm256_add_ps(sum1, g);
+
+		g = _mm256_loadu_ps(src + j + 6);
+		g = _mm256_mul_ps(g, g);
+		g = _mm256_mul_ps(f6, g);
+		sum2 = _mm256_add_ps(sum2, g);
+
+		g = _mm256_loadu_ps(src + j + 7);
+		g = _mm256_mul_ps(g, g);
+		g = _mm256_mul_ps(f7, g);
+		sum3 = _mm256_add_ps(sum3, g);
+
+		g = _mm256_loadu_ps(src + j + 8);
+		g = _mm256_mul_ps(g, g);
+		g = _mm256_mul_ps(f8, g);
+		sum0 = _mm256_add_ps(sum0, g);
+
+		sum0 = _mm256_add_ps(sum0, sum2);
+		sum1 = _mm256_add_ps(sum1, sum3);
+
+		sum0 = _mm256_add_ps(sum0, sum1);
+		accum = _mm256_add_ps(accum, sum0);
+
+		_mm256_storeu_ps(dst + j + 4, accum); // radius = 4
+	}
+}
+
+FORCE_INLINE inline void convolution_f32_avx_s_1d_h_sq_scanline_5(const float * RESTRICT filter, int filter_width, const float * RESTRICT src, float * RESTRICT dst, int j_end)
+{
+	__m256 f0, f1, f2, f3, f4;
+
+	f0 = _mm256_broadcast_ss(filter + 0);
+	f1 = _mm256_broadcast_ss(filter + 1);
+	f2 = _mm256_broadcast_ss(filter + 2);
+	f3 = _mm256_broadcast_ss(filter + 3);
+	f4 = _mm256_broadcast_ss(filter + 4);
+
+	for (int j = 0; j < j_end; j += 8) {
+		__m256 accum = _mm256_setzero_ps();
+		__m256 sum0, sum1, sum2, sum3;
+		__m256 g;
+
+		g = _mm256_loadu_ps(src + j + 0);
+		g = _mm256_mul_ps(g, g);
+		g = _mm256_mul_ps(f0, g);
+		sum0 = g;
+
+		g = _mm256_loadu_ps(src + j + 1);
+		g = _mm256_mul_ps(g, g);
+		g = _mm256_mul_ps(f1, g);
+		sum1 = g;
+
+		g = _mm256_loadu_ps(src + j + 2);
+		g = _mm256_mul_ps(g, g);
+		g = _mm256_mul_ps(f2, g);
+		sum2 = g;
+
+		g = _mm256_loadu_ps(src + j + 3);
+		g = _mm256_mul_ps(g, g);
+		g = _mm256_mul_ps(f3, g);
+		sum3 = g;
+
+		g = _mm256_loadu_ps(src + j + 4);
+		g = _mm256_mul_ps(g, g);
+		g = _mm256_mul_ps(f4, g);
+		sum0 = _mm256_add_ps(sum0, g);
+
+		sum0 = _mm256_add_ps(sum0, sum2);
+		sum1 = _mm256_add_ps(sum1, sum3);
+
+		sum0 = _mm256_add_ps(sum0, sum1);
+		accum = _mm256_add_ps(accum, sum0);
+
+		_mm256_storeu_ps(dst + j + 2, accum); // radius = 2
+	}
+}
+
+// Filter a single scanline.
+FORCE_INLINE inline static void convolution_f32_avx_s_1d_v_sq_scanline(int N, const float * RESTRICT filter, int filter_width, const float * RESTRICT src, float * RESTRICT dst, int src_stride, int j_end)
+{
+
+	if (N == 5)
+	{
+		convolution_f32_avx_s_1d_v_sq_scanline_5(filter, filter_width, src, dst, src_stride, j_end);
+	}
+	else if (N == 9)
+	{
+		convolution_f32_avx_s_1d_v_sq_scanline_9(filter, filter_width, src, dst, src_stride, j_end);
+	}
+	else if (N == 17)
+	{
+		convolution_f32_avx_s_1d_v_sq_scanline_17(filter, filter_width, src, dst, src_stride, j_end);
+	}
+	else {
+
+		int radius = filter_width / 2;
+		src -= radius * src_stride;
+
+		for (int y = 0; y < filter_width; y += 9) {
+			__m256 f0, f1, f2, f3, f4, f5, f6, f7, f8;
+
+			f0 = _mm256_setzero_ps();
+			f1 = _mm256_setzero_ps();
+			f2 = _mm256_setzero_ps();
+			f3 = _mm256_setzero_ps();
+			f5 = _mm256_setzero_ps();
+			f6 = _mm256_setzero_ps();
+			f7 = _mm256_setzero_ps();
+			f8 = _mm256_setzero_ps();
+
+			switch (filter_width - y) {
+			default:
+				f8 = _mm256_broadcast_ss(filter + y + 8);
+			case 8:
+				f7 = _mm256_broadcast_ss(filter + y + 7);
+			case 7:
+				f6 = _mm256_broadcast_ss(filter + y + 6);
+			case 6:
+				f5 = _mm256_broadcast_ss(filter + y + 5);
+			case 5:
+				f4 = _mm256_broadcast_ss(filter + y + 4);
+			case 4:
+				f3 = _mm256_broadcast_ss(filter + y + 3);
+			case 3:
+				f2 = _mm256_broadcast_ss(filter + y + 2);
+			case 2:
+				f1 = _mm256_broadcast_ss(filter + y + 1);
+			case 1:
+				f0 = _mm256_broadcast_ss(filter + y + 0);
+			}
+
+			for (int j = 0; j < j_end; j += 8) {
+				__m256 accum = _mm256_setzero_ps();
+				__m256 sum0, sum1, sum2, sum3;
+				__m256 g;
+
+				sum0 = _mm256_setzero_ps();
+				sum1 = _mm256_setzero_ps();
+				sum2 = _mm256_setzero_ps();
+				sum3 = _mm256_setzero_ps();
+
+				switch (filter_width - y) {
+				default:
+					g = _mm256_load_ps(src + (y + 8) * src_stride + j);
+					g = _mm256_mul_ps(g, g);
+					sum0 = _mm256_mul_ps(f8, g);
+				case 8:
+					g = _mm256_load_ps(src + (y + 7) * src_stride + j);
+					g = _mm256_mul_ps(g, g);
+					sum3 = _mm256_mul_ps(f7, g);
+				case 7:
+					g = _mm256_load_ps(src + (y + 6) * src_stride + j);
+					g = _mm256_mul_ps(g, g);
+					sum2 = _mm256_mul_ps(f6, g);
+				case 6:
+					g = _mm256_load_ps(src + (y + 5) * src_stride + j);
+					g = _mm256_mul_ps(g, g);
+					sum1 = _mm256_mul_ps(f5, g);
+				case 5:
+					g = _mm256_load_ps(src + (y + 4) * src_stride + j);
+					g = _mm256_mul_ps(g, g);
+					g = _mm256_mul_ps(f4, g);
+					sum0 = _mm256_add_ps(sum0, g);
+				case 4:
+					g = _mm256_load_ps(src + (y + 3) * src_stride + j);
+					g = _mm256_mul_ps(g, g);
+					g = _mm256_mul_ps(f3, g);
+					sum3 = _mm256_add_ps(sum3, g);
+				case 3:
+					g = _mm256_load_ps(src + (y + 2) * src_stride + j);
+					g = _mm256_mul_ps(g, g);
+					g = _mm256_mul_ps(f2, g);
+					sum2 = _mm256_add_ps(sum2, g);
+				case 2:
+					g = _mm256_load_ps(src + (y + 1) * src_stride + j);
+					g = _mm256_mul_ps(g, g);
+					g = _mm256_mul_ps(f1, g);
+					sum1 = _mm256_add_ps(sum1, g);
+				case 1:
+					g = _mm256_load_ps(src + (y + 0) * src_stride + j);
+					g = _mm256_mul_ps(g, g);
+					g = _mm256_mul_ps(f0, g);
+					sum0 = _mm256_add_ps(sum0, g);
+				}
+
+				sum0 = _mm256_add_ps(sum0, sum2);
+				sum1 = _mm256_add_ps(sum1, sum3);
+
+				sum0 = _mm256_add_ps(sum0, sum1);
+				accum = _mm256_add_ps(accum, sum0);
+
+				if (y)
+					accum = _mm256_add_ps(accum, _mm256_load_ps(dst + j));
+
+				_mm256_store_ps(dst + j, accum);
+			}
+		}
+	}
+}
+
+FORCE_INLINE inline void convolution_f32_avx_s_1d_v_sq_scanline_17(const float * RESTRICT filter, int filter_width, const float * RESTRICT src, float * RESTRICT dst, int src_stride, int j_end)
+{
+	__m256 f0, f1, f2, f3, f4, f5, f6, f7, f8;
+	src -= 8 * src_stride; // radius = 8
+
+						   // Evaluate filter taps 0-8
+	f0 = _mm256_broadcast_ss(filter + 0);
+	f1 = _mm256_broadcast_ss(filter + 1);
+	f2 = _mm256_broadcast_ss(filter + 2);
+	f3 = _mm256_broadcast_ss(filter + 3);
+	f4 = _mm256_broadcast_ss(filter + 4);
+	f5 = _mm256_broadcast_ss(filter + 5);
+	f6 = _mm256_broadcast_ss(filter + 6);
+	f7 = _mm256_broadcast_ss(filter + 7);
+	f8 = _mm256_broadcast_ss(filter + 8);
+
+	for (int j = 0; j < j_end; j += 8) {
+		__m256 sum0, sum1, sum2, sum3;
+		__m256 g;
+
+		g = _mm256_load_ps(src + 0 * src_stride + j);
+		g = _mm256_mul_ps(g, g);
+		g = _mm256_mul_ps(f0, g);
+		sum0 = g;
+
+		g = _mm256_load_ps(src + 1 * src_stride + j);
+		g = _mm256_mul_ps(g, g);
+		g = _mm256_mul_ps(f1, g);
+		sum1 = g;
+
+		g = _mm256_load_ps(src + 2 * src_stride + j);
+		g = _mm256_mul_ps(g, g);
+		g = _mm256_mul_ps(f2, g);
+		sum2 = g;
+
+		g = _mm256_load_ps(src + 3 * src_stride + j);
+		g = _mm256_mul_ps(g, g);
+		g = _mm256_mul_ps(f3, g);
+		sum3 = g;
+
+		g = _mm256_load_ps(src + 4 * src_stride + j);
+		g = _mm256_mul_ps(g, g);
+		g = _mm256_mul_ps(f4, g);
+		sum0 = _mm256_add_ps(sum0, g);
+
+		g = _mm256_load_ps(src + 5 * src_stride + j);
+		g = _mm256_mul_ps(g, g);
+		g = _mm256_mul_ps(f5, g);
+		sum1 = _mm256_add_ps(sum1, g);
+
+		g = _mm256_load_ps(src + 6 * src_stride + j);
+		g = _mm256_mul_ps(g, g);
+		g = _mm256_mul_ps(f6, g);
+		sum2 = _mm256_add_ps(sum2, g);
+
+		g = _mm256_load_ps(src + 7 * src_stride + j);
+		g = _mm256_mul_ps(g, g);
+		g = _mm256_mul_ps(f7, g);
+		sum3 = _mm256_add_ps(sum3, g);
+
+		g = _mm256_load_ps(src + 8 * src_stride + j);
+		g = _mm256_mul_ps(g, g);
+		g = _mm256_mul_ps(f8, g);
+		sum0 = _mm256_add_ps(sum0, g);
+
+		sum0 = _mm256_add_ps(sum0, sum2);
+		sum1 = _mm256_add_ps(sum1, sum3);
+
+		sum0 = _mm256_add_ps(sum0, sum1);
+
+		_mm256_store_ps(dst + j, sum0);
+	}
+
+	// Evaluate filter taps 9-16
+	f0 = _mm256_broadcast_ss(filter + 9);
+	f1 = _mm256_broadcast_ss(filter + 10);
+	f2 = _mm256_broadcast_ss(filter + 11);
+	f3 = _mm256_broadcast_ss(filter + 12);
+	f4 = _mm256_broadcast_ss(filter + 13);
+	f5 = _mm256_broadcast_ss(filter + 14);
+	f6 = _mm256_broadcast_ss(filter + 15);
+	f7 = _mm256_broadcast_ss(filter + 16);
+
+	for (int j = 0; j < j_end; j += 8) {
+		__m256 sum0, sum1, sum2, sum3;
+		__m256 g;
+
+		g = _mm256_load_ps(src + 9 * src_stride + j);
+		g = _mm256_mul_ps(g, g);
+		g = _mm256_mul_ps(f0, g);
+		sum0 = g;
+
+		g = _mm256_load_ps(src + 10 * src_stride + j);
+		g = _mm256_mul_ps(g, g);
+		g = _mm256_mul_ps(f1, g);
+		sum1 = g;
+
+		g = _mm256_load_ps(src + 11 * src_stride + j);
+		g = _mm256_mul_ps(g, g);
+		g = _mm256_mul_ps(f2, g);
+		sum2 = g;
+
+		g = _mm256_load_ps(src + 12 * src_stride + j);
+		g = _mm256_mul_ps(g, g);
+		g = _mm256_mul_ps(f3, g);
+		sum3 = g;
+
+		g = _mm256_load_ps(src + 13 * src_stride + j);
+		g = _mm256_mul_ps(g, g);
+		g = _mm256_mul_ps(f4, g);
+		sum0 = _mm256_add_ps(sum0, g);
+
+		g = _mm256_load_ps(src + 14 * src_stride + j);
+		g = _mm256_mul_ps(g, g);
+		g = _mm256_mul_ps(f5, g);
+		sum1 = _mm256_add_ps(sum1, g);
+
+		g = _mm256_load_ps(src + 15 * src_stride + j);
+		g = _mm256_mul_ps(g, g);
+		g = _mm256_mul_ps(f6, g);
+		sum2 = _mm256_add_ps(sum2, g);
+
+		g = _mm256_load_ps(src + 16 * src_stride + j);
+		g = _mm256_mul_ps(g, g);
+		g = _mm256_mul_ps(f7, g);
+		sum3 = _mm256_add_ps(sum3, g);
+
+		sum0 = _mm256_add_ps(sum0, sum2);
+		sum1 = _mm256_add_ps(sum1, sum3);
+
+		sum0 = _mm256_add_ps(sum0, sum1);
+
+		sum0 = _mm256_add_ps(_mm256_load_ps(dst + j), sum0);
+		_mm256_store_ps(dst + j, sum0);
+	}
+}
+
+FORCE_INLINE inline void convolution_f32_avx_s_1d_v_sq_scanline_9(const float * RESTRICT filter, int filter_width, const float * RESTRICT src, float * RESTRICT dst, int src_stride, int j_end)
+{
+	__m256 f0, f1, f2, f3, f4, f5, f6, f7, f8;
+	src -= 4 * src_stride; // radius = 4
+
+						   // Evaluate filter taps 0-8
+	f0 = _mm256_broadcast_ss(filter + 0);
+	f1 = _mm256_broadcast_ss(filter + 1);
+	f2 = _mm256_broadcast_ss(filter + 2);
+	f3 = _mm256_broadcast_ss(filter + 3);
+	f4 = _mm256_broadcast_ss(filter + 4);
+	f5 = _mm256_broadcast_ss(filter + 5);
+	f6 = _mm256_broadcast_ss(filter + 6);
+	f7 = _mm256_broadcast_ss(filter + 7);
+	f8 = _mm256_broadcast_ss(filter + 8);
+
+	for (int j = 0; j < j_end; j += 8) {
+		__m256 sum0, sum1, sum2, sum3;
+		__m256 g;
+
+		g = _mm256_load_ps(src + 0 * src_stride + j);
+		g = _mm256_mul_ps(g, g);
+		g = _mm256_mul_ps(f0, g);
+		sum0 = g;
+
+		g = _mm256_load_ps(src + 1 * src_stride + j);
+		g = _mm256_mul_ps(g, g);
+		g = _mm256_mul_ps(f1, g);
+		sum1 = g;
+
+		g = _mm256_load_ps(src + 2 * src_stride + j);
+		g = _mm256_mul_ps(g, g);
+		g = _mm256_mul_ps(f2, g);
+		sum2 = g;
+
+		g = _mm256_load_ps(src + 3 * src_stride + j);
+		g = _mm256_mul_ps(g, g);
+		g = _mm256_mul_ps(f3, g);
+		sum3 = g;
+
+		g = _mm256_load_ps(src + 4 * src_stride + j);
+		g = _mm256_mul_ps(g, g);
+		g = _mm256_mul_ps(f4, g);
+		sum0 = _mm256_add_ps(sum0, g);
+
+		g = _mm256_load_ps(src + 5 * src_stride + j);
+		g = _mm256_mul_ps(g, g);
+		g = _mm256_mul_ps(f5, g);
+		sum1 = _mm256_add_ps(sum1, g);
+
+		g = _mm256_load_ps(src + 6 * src_stride + j);
+		g = _mm256_mul_ps(g, g);
+		g = _mm256_mul_ps(f6, g);
+		sum2 = _mm256_add_ps(sum2, g);
+
+		g = _mm256_load_ps(src + 7 * src_stride + j);
+		g = _mm256_mul_ps(g, g);
+		g = _mm256_mul_ps(f7, g);
+		sum3 = _mm256_add_ps(sum3, g);
+
+		g = _mm256_load_ps(src + 8 * src_stride + j);
+		g = _mm256_mul_ps(g, g);
+		g = _mm256_mul_ps(f8, g);
+		sum0 = _mm256_add_ps(sum0, g);
+
+		sum0 = _mm256_add_ps(sum0, sum2);
+		sum1 = _mm256_add_ps(sum1, sum3);
+
+		sum0 = _mm256_add_ps(sum0, sum1);
+
+		_mm256_store_ps(dst + j, sum0);
+	}
+}
+
+FORCE_INLINE inline void convolution_f32_avx_s_1d_v_sq_scanline_5(const float * RESTRICT filter, int filter_width, const float * RESTRICT src, float * RESTRICT dst, int src_stride, int j_end)
+{
+	__m256 f0, f1, f2, f3, f4;
+	src -= 2 * src_stride; // radius = 2
+
+						   // Evaluate filter taps 0-5
+	f0 = _mm256_broadcast_ss(filter + 0);
+	f1 = _mm256_broadcast_ss(filter + 1);
+	f2 = _mm256_broadcast_ss(filter + 2);
+	f3 = _mm256_broadcast_ss(filter + 3);
+	f4 = _mm256_broadcast_ss(filter + 4);
+
+	for (int j = 0; j < j_end; j += 8) {
+		__m256 sum0, sum1, sum2, sum3;
+		__m256 g;
+
+		g = _mm256_load_ps(src + 0 * src_stride + j);
+		g = _mm256_mul_ps(g, g);
+		g = _mm256_mul_ps(f0, g);
+		sum0 = g;
+
+		g = _mm256_load_ps(src + 1 * src_stride + j);
+		g = _mm256_mul_ps(g, g);
+		g = _mm256_mul_ps(f1, g);
+		sum1 = g;
+
+		g = _mm256_load_ps(src + 2 * src_stride + j);
+		g = _mm256_mul_ps(g, g);
+		g = _mm256_mul_ps(f2, g);
+		sum2 = g;
+
+		g = _mm256_load_ps(src + 3 * src_stride + j);
+		g = _mm256_mul_ps(g, g);
+		g = _mm256_mul_ps(f3, g);
+		sum3 = g;
+
+		g = _mm256_load_ps(src + 4 * src_stride + j);
+		g = _mm256_mul_ps(g, g);
+		g = _mm256_mul_ps(f4, g);
+		sum0 = _mm256_add_ps(sum0, g);
+
+		sum0 = _mm256_add_ps(sum0, sum2);
+		sum1 = _mm256_add_ps(sum1, sum3);
+
+		sum0 = _mm256_add_ps(sum0, sum1);
+
+		_mm256_store_ps(dst + j, sum0);
+	}
+}
+
+void convolution_f32_avx_s_1d_sq(
+	int N,
+	const float * RESTRICT filter,
+	int filter_width,
+	const float * RESTRICT src,
+	float * RESTRICT dst,
+	float * RESTRICT tmp,
+	int width,
+	int height,
+	int src_stride,
+	int dst_stride)
+{
+	int radius = filter_width / 2;
+	int width_mod8 = vmaf_floorn(width, 8);
+	int tmp_stride = vmaf_ceiln(width, 8);
+
+	int i_vec_end = height - radius;
+	int j_vec_end = width_mod8 - vmaf_ceiln(radius + 1, 8);
+
+	// Vertical pass.
+	for (int i = 0; i < radius; ++i) {
+		for (int j = 0; j < width; ++j) {
+			tmp[i * tmp_stride + j] = convolution_edge_sq_s(false, filter, filter_width, src, width, height, src_stride, i, j);
+		}
+	}
+	for (int i = radius; i < i_vec_end; ++i) {
+		convolution_f32_avx_s_1d_v_sq_scanline(N, filter, filter_width, src + i * src_stride, tmp + i * tmp_stride, src_stride, width_mod8);
+
+		for (int j = width_mod8; j < width; ++j) {
+			tmp[i * tmp_stride + j] = convolution_edge_sq_s(false, filter, filter_width, src, width, height, src_stride, i, j);
+		}
+	}
+	for (int i = i_vec_end; i < height; ++i) {
+		for (int j = 0; j < width; ++j) {
+			tmp[i * tmp_stride + j] = convolution_edge_sq_s(false, filter, filter_width, src, width, height, src_stride, i, j);
+		}
+	}
+
+	// Horizontal pass.
+	for (int i = 0; i < height; ++i) {
+		for (int j = 0; j < radius; ++j) {
+			dst[i * dst_stride + j] = convolution_edge_s(true, filter, filter_width, tmp, width, height, tmp_stride, i, j);
+		}
+
+		convolution_f32_avx_s_1d_h_scanline(N, filter, filter_width, tmp + i * tmp_stride, dst + i * dst_stride, j_vec_end);
+
+		for (int j = j_vec_end + radius; j < width; ++j) {
+			dst[i * dst_stride + j] = convolution_edge_s(true, filter, filter_width, tmp, width, height, tmp_stride, i, j);
+		}
+	}
+}
+
+void convolution_f32_avx_sq_s(const float *filter, int filter_width, const float *src, float *dst, float *tmp, int width, int height, int src_stride, int dst_stride)
+{
+	switch (filter_width) {
+	case 17:
+		convolution_f32_avx_s_1d_sq(17, filter, filter_width, src, dst, tmp, width, height, src_stride, dst_stride);
+		break;
+	case 9:
+		convolution_f32_avx_s_1d_sq(9, filter, filter_width, src, dst, tmp, width, height, src_stride, dst_stride);
+		break;
+	case 5:
+		convolution_f32_avx_s_1d_sq(5, filter, filter_width, src, dst, tmp, width, height, src_stride, dst_stride);
+		break;
+	case 3:
+		convolution_f32_avx_s_1d_sq(3, filter, filter_width, src, dst, tmp, width, height, src_stride, dst_stride);
+		break;
+	default:
+		convolution_f32_avx_s_1d_sq(0, filter, filter_width, src, dst, tmp, width, height, src_stride, dst_stride);
+		break;
+	}
+}
+
+// Filter a single scanline.
+FORCE_INLINE inline static void convolution_f32_avx_s_1d_h_xy_scanline(int N, const float * RESTRICT filter, int filter_width, const float * RESTRICT src1, const float * RESTRICT src2, float * RESTRICT dst, int j_end)
+{
+
+	if (N == 5)
+	{
+		convolution_f32_avx_s_1d_h_xy_scanline_5(filter, filter_width, src1, src2, dst, j_end);
+	}
+	else if (N == 9)
+	{
+		convolution_f32_avx_s_1d_h_xy_scanline_9(filter, filter_width, src1, src2, dst, j_end);
+	}
+	else if (N == 17)
+	{
+		convolution_f32_avx_s_1d_h_xy_scanline_17(filter, filter_width, src1, src2, dst, j_end);
+	}
+	else {
+
+		int radius = filter_width / 2;
+
+		for (int x = 0; x < filter_width; x += 9) {
+			__m256 f0, f1, f2, f3, f4, f5, f6, f7, f8;
+
+			f0 = _mm256_setzero_ps();
+			f1 = _mm256_setzero_ps();
+			f2 = _mm256_setzero_ps();
+			f3 = _mm256_setzero_ps();
+			f5 = _mm256_setzero_ps();
+			f6 = _mm256_setzero_ps();
+			f7 = _mm256_setzero_ps();
+			f8 = _mm256_setzero_ps();
+
+			switch (filter_width - x) {
+			default:
+				f8 = _mm256_broadcast_ss(filter + x + 8);
+			case 8:
+				f7 = _mm256_broadcast_ss(filter + x + 7);
+			case 7:
+				f6 = _mm256_broadcast_ss(filter + x + 6);
+			case 6:
+				f5 = _mm256_broadcast_ss(filter + x + 5);
+			case 5:
+				f4 = _mm256_broadcast_ss(filter + x + 4);
+			case 4:
+				f3 = _mm256_broadcast_ss(filter + x + 3);
+			case 3:
+				f2 = _mm256_broadcast_ss(filter + x + 2);
+			case 2:
+				f1 = _mm256_broadcast_ss(filter + x + 1);
+			case 1:
+				f0 = _mm256_broadcast_ss(filter + x + 0);
+			}
+
+			for (int j = 0; j < j_end; j += 8) {
+				__m256 accum = _mm256_setzero_ps();
+				__m256 sum0, sum1, sum2, sum3;
+				__m256 g, g2;
+
+				sum0 = _mm256_setzero_ps();
+				sum1 = _mm256_setzero_ps();
+				sum2 = _mm256_setzero_ps();
+				sum3 = _mm256_setzero_ps();
+
+				switch (filter_width - x) {
+				default:
+					g = _mm256_loadu_ps(src1 + j + x + 8);
+					g2 = _mm256_loadu_ps(src2 + j + x + 8);
+					g = _mm256_mul_ps(g, g2);
+					sum0 = _mm256_mul_ps(f8, g);
+				case 8:
+					g = _mm256_loadu_ps(src1 + j + x + 7);
+					g2 = _mm256_loadu_ps(src2 + j + x + 7);
+					g = _mm256_mul_ps(g, g2);
+					sum3 = _mm256_mul_ps(f7, g);
+				case 7:
+					g = _mm256_loadu_ps(src1 + j + x + 6);
+					g2 = _mm256_loadu_ps(src2 + j + x + 6);
+					g = _mm256_mul_ps(g, g2);
+					sum2 = _mm256_mul_ps(f6, g);
+				case 6:
+					g = _mm256_loadu_ps(src1 + j + x + 5);
+					g2 = _mm256_loadu_ps(src2 + j + x + 5);
+					g = _mm256_mul_ps(g, g2);
+					sum1 = _mm256_mul_ps(f5, g);
+				case 5:
+					g = _mm256_loadu_ps(src1 + j + x + 4);
+					g2 = _mm256_loadu_ps(src2 + j + x + 4);
+					g = _mm256_mul_ps(g, g2);
+					g = _mm256_mul_ps(f4, g);
+					sum0 = _mm256_add_ps(sum0, g);
+				case 4:
+					g = _mm256_loadu_ps(src1 + j + x + 3);
+					g2 = _mm256_loadu_ps(src2 + j + x + 3);
+					g = _mm256_mul_ps(g, g2);
+					g = _mm256_mul_ps(f3, g);
+					sum3 = _mm256_add_ps(sum3, g);
+				case 3:
+					g = _mm256_loadu_ps(src1 + j + x + 2);
+					g2 = _mm256_loadu_ps(src2 + j + x + 2);
+					g = _mm256_mul_ps(g, g2);
+					g = _mm256_mul_ps(f2, g);
+					sum2 = _mm256_add_ps(sum2, g);
+				case 2:
+					g = _mm256_loadu_ps(src1 + j + x + 1);
+					g2 = _mm256_loadu_ps(src2 + j + x + 1);
+					g = _mm256_mul_ps(g, g2);
+					g = _mm256_mul_ps(f1, g);
+					sum1 = _mm256_add_ps(sum1, g);
+				case 1:
+					g = _mm256_loadu_ps(src1 + j + x + 0);
+					g2 = _mm256_loadu_ps(src2 + j + x + 0);
+					g = _mm256_mul_ps(g, g2);
+					g = _mm256_mul_ps(f0, g);
+					sum0 = _mm256_add_ps(sum0, g);
+				}
+
+				sum0 = _mm256_add_ps(sum0, sum2);
+				sum1 = _mm256_add_ps(sum1, sum3);
+
+				sum0 = _mm256_add_ps(sum0, sum1);
+				accum = _mm256_add_ps(accum, sum0);
+
+				if (x)
+					accum = _mm256_add_ps(accum, _mm256_loadu_ps(dst + j + radius));
+
+				_mm256_storeu_ps(dst + j + radius, accum);
+			}
+		}
+
+	}
+}
+
+FORCE_INLINE inline void convolution_f32_avx_s_1d_h_xy_scanline_17(const float * RESTRICT filter, int filter_width, const float * RESTRICT src1, const float * RESTRICT src2, float * RESTRICT dst, int j_end)
+{
+	__m256 f0, f1, f2, f3, f4, f5, f6, f7, f8;
+
+	// Evaluate filter taps 0-8
+	f0 = _mm256_broadcast_ss(filter + 0);
+	f1 = _mm256_broadcast_ss(filter + 1);
+	f2 = _mm256_broadcast_ss(filter + 2);
+	f3 = _mm256_broadcast_ss(filter + 3);
+	f4 = _mm256_broadcast_ss(filter + 4);
+	f5 = _mm256_broadcast_ss(filter + 5);
+	f6 = _mm256_broadcast_ss(filter + 6);
+	f7 = _mm256_broadcast_ss(filter + 7);
+	f8 = _mm256_broadcast_ss(filter + 8);
+
+	for (int j = 0; j < j_end; j += 8) {
+		__m256 accum = _mm256_setzero_ps();
+		__m256 sum0, sum1, sum2, sum3;
+		__m256 g, g2;
+
+		g = _mm256_loadu_ps(src1 + j + 0);
+		g2 = _mm256_loadu_ps(src2 + j + 0);
+		g = _mm256_mul_ps(g, g2);
+		g = _mm256_mul_ps(f0, g);
+		sum0 = g;
+
+		g = _mm256_loadu_ps(src1 + j + 1);
+		g2 = _mm256_loadu_ps(src2 + j + 1);
+		g = _mm256_mul_ps(g, g2);
+		g = _mm256_mul_ps(f1, g);
+		sum1 = g;
+
+		g = _mm256_loadu_ps(src1 + j + 2);
+		g2 = _mm256_loadu_ps(src2 + j + 2);
+		g = _mm256_mul_ps(g, g2);
+		g = _mm256_mul_ps(f2, g);
+		sum2 = g;
+
+		g = _mm256_loadu_ps(src1 + j + 3);
+		g2 = _mm256_loadu_ps(src2 + j + 3);
+		g = _mm256_mul_ps(g, g2);
+		g = _mm256_mul_ps(f3, g);
+		sum3 = g;
+
+		g = _mm256_loadu_ps(src1 + j + 4);
+		g2 = _mm256_loadu_ps(src2 + j + 4);
+		g = _mm256_mul_ps(g, g2);
+		g = _mm256_mul_ps(f4, g);
+		sum0 = _mm256_add_ps(sum0, g);
+
+		g = _mm256_loadu_ps(src1 + j + 5);
+		g2 = _mm256_loadu_ps(src2 + j + 5);
+		g = _mm256_mul_ps(g, g2);
+		g = _mm256_mul_ps(f5, g);
+		sum1 = _mm256_add_ps(sum1, g);
+
+		g = _mm256_loadu_ps(src1 + j + 6);
+		g2 = _mm256_loadu_ps(src2 + j + 6);
+		g = _mm256_mul_ps(g, g2);
+		g = _mm256_mul_ps(f6, g);
+		sum2 = _mm256_add_ps(sum2, g);
+
+		g = _mm256_loadu_ps(src1 + j + 7);
+		g2 = _mm256_loadu_ps(src2 + j + 7);
+		g = _mm256_mul_ps(g, g2);
+		g = _mm256_mul_ps(f7, g);
+		sum3 = _mm256_add_ps(sum3, g);
+
+		g = _mm256_loadu_ps(src1 + j + 8);
+		g2 = _mm256_loadu_ps(src2 + j + 8);
+		g = _mm256_mul_ps(g, g2);
+		g = _mm256_mul_ps(f8, g);
+		sum0 = _mm256_add_ps(sum0, g);
+
+		sum0 = _mm256_add_ps(sum0, sum2);
+		sum1 = _mm256_add_ps(sum1, sum3);
+
+		sum0 = _mm256_add_ps(sum0, sum1);
+		accum = _mm256_add_ps(accum, sum0);
+
+		_mm256_store_ps(dst + j + 8, accum); // radius = 8
+	}
+
+	// Evaluate filter taps 9-16
+	f0 = _mm256_broadcast_ss(filter + 9);
+	f1 = _mm256_broadcast_ss(filter + 10);
+	f2 = _mm256_broadcast_ss(filter + 11);
+	f3 = _mm256_broadcast_ss(filter + 12);
+	f4 = _mm256_broadcast_ss(filter + 13);
+	f5 = _mm256_broadcast_ss(filter + 14);
+	f6 = _mm256_broadcast_ss(filter + 15);
+	f7 = _mm256_broadcast_ss(filter + 16);
+
+	for (int j = 0; j < j_end; j += 8) {
+		__m256 sum0, sum1, sum2, sum3;
+		__m256 g, g2;
+
+		float *dst_ptr = dst + j + 8; // radius = 8
+
+		g = _mm256_loadu_ps(src1 + j + 9);
+		g2 = _mm256_loadu_ps(src2 + j + 9);
+		g = _mm256_mul_ps(g, g2);
+		g = _mm256_mul_ps(f0, g);
+		sum0 = g;
+
+		g = _mm256_loadu_ps(src1 + j + 10);
+		g2 = _mm256_loadu_ps(src2 + j + 10);
+		g = _mm256_mul_ps(g, g2);
+		g = _mm256_mul_ps(f1, g);
+		sum1 = g;
+
+		g = _mm256_loadu_ps(src1 + j + 11);
+		g2 = _mm256_loadu_ps(src2 + j + 11);
+		g = _mm256_mul_ps(g, g2);
+		g = _mm256_mul_ps(f2, g);
+		sum2 = g;
+
+		g = _mm256_loadu_ps(src1 + j + 12);
+		g2 = _mm256_loadu_ps(src2 + j + 12);
+		g = _mm256_mul_ps(g, g2);
+		g = _mm256_mul_ps(f3, g);
+		sum3 = g;
+
+		g = _mm256_loadu_ps(src1 + j + 13);
+		g2 = _mm256_loadu_ps(src2 + j + 13);
+		g = _mm256_mul_ps(g, g2);
+		g = _mm256_mul_ps(f4, g);
+		sum0 = _mm256_add_ps(sum0, g);
+
+		g = _mm256_loadu_ps(src1 + j + 14);
+		g2 = _mm256_loadu_ps(src2 + j + 14);
+		g = _mm256_mul_ps(g, g2);
+		g = _mm256_mul_ps(f5, g);
+		sum1 = _mm256_add_ps(sum1, g);
+
+		g = _mm256_loadu_ps(src1 + j + 15);
+		g2 = _mm256_loadu_ps(src2 + j + 15);
+		g = _mm256_mul_ps(g, g2);
+		g = _mm256_mul_ps(f6, g);
+		sum2 = _mm256_add_ps(sum2, g);
+
+		g = _mm256_loadu_ps(src1 + j + 16);
+		g2 = _mm256_loadu_ps(src2 + j + 16);
+		g = _mm256_mul_ps(g, g2);
+		g = _mm256_mul_ps(f7, g);
+		sum3 = _mm256_add_ps(sum3, g);
+
+		sum0 = _mm256_add_ps(sum0, sum2);
+		sum1 = _mm256_add_ps(sum1, sum3);
+
+		sum0 = _mm256_add_ps(sum0, sum1);
+
+		sum0 = _mm256_add_ps(_mm256_load_ps(dst_ptr), sum0);
+		_mm256_store_ps(dst_ptr, sum0);
+	}
+}
+
+FORCE_INLINE inline void convolution_f32_avx_s_1d_h_xy_scanline_9(const float * RESTRICT filter, int filter_width, const float * RESTRICT src1, const float * RESTRICT src2, float * RESTRICT dst, int j_end)
+{
+	__m256 f0, f1, f2, f3, f4, f5, f6, f7, f8;
+
+	f0 = _mm256_broadcast_ss(filter + 0);
+	f1 = _mm256_broadcast_ss(filter + 1);
+	f2 = _mm256_broadcast_ss(filter + 2);
+	f3 = _mm256_broadcast_ss(filter + 3);
+	f4 = _mm256_broadcast_ss(filter + 4);
+	f5 = _mm256_broadcast_ss(filter + 5);
+	f6 = _mm256_broadcast_ss(filter + 6);
+	f7 = _mm256_broadcast_ss(filter + 7);
+	f8 = _mm256_broadcast_ss(filter + 8);
+
+	for (int j = 0; j < j_end; j += 8) {
+		__m256 accum = _mm256_setzero_ps();
+		__m256 sum0, sum1, sum2, sum3;
+		__m256 g, g2;
+
+		g = _mm256_loadu_ps(src1 + j + 0);
+		g2 = _mm256_loadu_ps(src2 + j + 0);
+		g = _mm256_mul_ps(g, g2);
+		g = _mm256_mul_ps(f0, g);
+		sum0 = g;
+
+		g = _mm256_loadu_ps(src1 + j + 1);
+		g2 = _mm256_loadu_ps(src2 + j + 1);
+		g = _mm256_mul_ps(g, g2);
+		g = _mm256_mul_ps(f1, g);
+		sum1 = g;
+
+		g = _mm256_loadu_ps(src1 + j + 2);
+		g2 = _mm256_loadu_ps(src2 + j + 2);
+		g = _mm256_mul_ps(g, g2);
+		g = _mm256_mul_ps(f2, g);
+		sum2 = g;
+
+		g = _mm256_loadu_ps(src1 + j + 3);
+		g2 = _mm256_loadu_ps(src2 + j + 3);
+		g = _mm256_mul_ps(g, g2);
+		g = _mm256_mul_ps(f3, g);
+		sum3 = g;
+
+		g = _mm256_loadu_ps(src1 + j + 4);
+		g2 = _mm256_loadu_ps(src2 + j + 4);
+		g = _mm256_mul_ps(g, g2);
+		g = _mm256_mul_ps(f4, g);
+		sum0 = _mm256_add_ps(sum0, g);
+
+		g = _mm256_loadu_ps(src1 + j + 5);
+		g2 = _mm256_loadu_ps(src2 + j + 5);
+		g = _mm256_mul_ps(g, g2);
+		g = _mm256_mul_ps(f5, g);
+		sum1 = _mm256_add_ps(sum1, g);
+
+		g = _mm256_loadu_ps(src1 + j + 6);
+		g2 = _mm256_loadu_ps(src2 + j + 6);
+		g = _mm256_mul_ps(g, g2);
+		g = _mm256_mul_ps(f6, g);
+		sum2 = _mm256_add_ps(sum2, g);
+
+		g = _mm256_loadu_ps(src1 + j + 7);
+		g2 = _mm256_loadu_ps(src2 + j + 7);
+		g = _mm256_mul_ps(g, g2);
+		g = _mm256_mul_ps(f7, g);
+		sum3 = _mm256_add_ps(sum3, g);
+
+		g = _mm256_loadu_ps(src1 + j + 8);
+		g2 = _mm256_loadu_ps(src2 + j + 8);
+		g = _mm256_mul_ps(g, g2);
+		g = _mm256_mul_ps(f8, g);
+		sum0 = _mm256_add_ps(sum0, g);
+
+		sum0 = _mm256_add_ps(sum0, sum2);
+		sum1 = _mm256_add_ps(sum1, sum3);
+
+		sum0 = _mm256_add_ps(sum0, sum1);
+		accum = _mm256_add_ps(accum, sum0);
+
+		_mm256_storeu_ps(dst + j + 4, accum); // radius = 4
+	}
+}
+
+FORCE_INLINE inline void convolution_f32_avx_s_1d_h_xy_scanline_5(const float * RESTRICT filter, int filter_width, const float * RESTRICT src1, const float * RESTRICT src2, float * RESTRICT dst, int j_end)
+{
+	__m256 f0, f1, f2, f3, f4;
+
+	f0 = _mm256_broadcast_ss(filter + 0);
+	f1 = _mm256_broadcast_ss(filter + 1);
+	f2 = _mm256_broadcast_ss(filter + 2);
+	f3 = _mm256_broadcast_ss(filter + 3);
+	f4 = _mm256_broadcast_ss(filter + 4);
+
+	for (int j = 0; j < j_end; j += 8) {
+		__m256 accum = _mm256_setzero_ps();
+		__m256 sum0, sum1, sum2, sum3;
+		__m256 g, g2;
+
+		g = _mm256_loadu_ps(src1 + j + 0);
+		g2 = _mm256_loadu_ps(src2 + j + 0);
+		g = _mm256_mul_ps(g, g2);
+		g = _mm256_mul_ps(f0, g);
+		sum0 = g;
+
+		g = _mm256_loadu_ps(src1 + j + 1);
+		g2 = _mm256_loadu_ps(src2 + j + 1);
+		g = _mm256_mul_ps(g, g2);
+		g = _mm256_mul_ps(f1, g);
+		sum1 = g;
+
+		g = _mm256_loadu_ps(src1 + j + 2);
+		g2 = _mm256_loadu_ps(src2 + j + 2);
+		g = _mm256_mul_ps(g, g2);
+		g = _mm256_mul_ps(f2, g);
+		sum2 = g;
+
+		g = _mm256_loadu_ps(src1 + j + 3);
+		g2 = _mm256_loadu_ps(src2 + j + 3);
+		g = _mm256_mul_ps(g, g2);
+		g = _mm256_mul_ps(f3, g);
+		sum3 = g;
+
+		g = _mm256_loadu_ps(src1 + j + 4);
+		g2 = _mm256_loadu_ps(src2 + j + 4);
+		g = _mm256_mul_ps(g, g2);
+		g = _mm256_mul_ps(f4, g);
+		sum0 = _mm256_add_ps(sum0, g);
+
+		sum0 = _mm256_add_ps(sum0, sum2);
+		sum1 = _mm256_add_ps(sum1, sum3);
+
+		sum0 = _mm256_add_ps(sum0, sum1);
+		accum = _mm256_add_ps(accum, sum0);
+
+		_mm256_storeu_ps(dst + j + 2, accum); // radius = 2
+	}
+}
+
+// Filter a single scanline.
+FORCE_INLINE inline static void convolution_f32_avx_s_1d_v_xy_scanline(int N, const float * RESTRICT filter, int filter_width, const float * RESTRICT src1, const float * RESTRICT src2, float * RESTRICT dst, int src1_stride, int src2_stride, int j_end)
+{
+
+	if (N == 5)
+	{
+		convolution_f32_avx_s_1d_v_xy_scanline_5(filter, filter_width, src1, src2, dst, src1_stride, src2_stride, j_end);
+	}
+	else if (N == 9)
+	{
+		convolution_f32_avx_s_1d_v_xy_scanline_9(filter, filter_width, src1, src2, dst, src1_stride, src2_stride, j_end);
+	}
+	else if (N == 17)
+	{
+		convolution_f32_avx_s_1d_v_xy_scanline_17(filter, filter_width, src1, src2, dst, src1_stride, src2_stride, j_end);
+	}
+	else {
+
+		int radius = filter_width / 2;
+		src1 -= radius * src1_stride;
+		src2 -= radius * src2_stride;
+
+		for (int y = 0; y < filter_width; y += 9) {
+			__m256 f0, f1, f2, f3, f4, f5, f6, f7, f8;
+
+			f0 = _mm256_setzero_ps();
+			f1 = _mm256_setzero_ps();
+			f2 = _mm256_setzero_ps();
+			f3 = _mm256_setzero_ps();
+			f5 = _mm256_setzero_ps();
+			f6 = _mm256_setzero_ps();
+			f7 = _mm256_setzero_ps();
+			f8 = _mm256_setzero_ps();
+
+			switch (filter_width - y) {
+			default:
+				f8 = _mm256_broadcast_ss(filter + y + 8);
+			case 8:
+				f7 = _mm256_broadcast_ss(filter + y + 7);
+			case 7:
+				f6 = _mm256_broadcast_ss(filter + y + 6);
+			case 6:
+				f5 = _mm256_broadcast_ss(filter + y + 5);
+			case 5:
+				f4 = _mm256_broadcast_ss(filter + y + 4);
+			case 4:
+				f3 = _mm256_broadcast_ss(filter + y + 3);
+			case 3:
+				f2 = _mm256_broadcast_ss(filter + y + 2);
+			case 2:
+				f1 = _mm256_broadcast_ss(filter + y + 1);
+			case 1:
+				f0 = _mm256_broadcast_ss(filter + y + 0);
+			}
+
+			for (int j = 0; j < j_end; j += 8) {
+				__m256 accum = _mm256_setzero_ps();
+				__m256 sum0, sum1, sum2, sum3;
+				__m256 g, g2;
+
+				sum0 = _mm256_setzero_ps();
+				sum1 = _mm256_setzero_ps();
+				sum2 = _mm256_setzero_ps();
+				sum3 = _mm256_setzero_ps();
+
+				switch (filter_width - y) {
+				default:
+					g = _mm256_load_ps(src1 + (y + 8) * src1_stride + j);
+					g2 = _mm256_load_ps(src2 + (y + 8) * src2_stride + j);
+					g = _mm256_mul_ps(g, g2);
+					sum0 = _mm256_mul_ps(f8, g);
+				case 8:
+					g = _mm256_load_ps(src1 + (y + 7) * src1_stride + j);
+					g2 = _mm256_load_ps(src2 + (y + 7) * src2_stride + j);
+					g = _mm256_mul_ps(g, g2);
+					sum3 = _mm256_mul_ps(f7, g);
+				case 7:
+					g = _mm256_load_ps(src1 + (y + 6) * src1_stride + j);
+					g2 = _mm256_load_ps(src2 + (y + 6) * src2_stride + j);
+					g = _mm256_mul_ps(g, g2);
+					sum2 = _mm256_mul_ps(f6, g);
+				case 6:
+					g = _mm256_load_ps(src1 + (y + 5) * src1_stride + j);
+					g2 = _mm256_load_ps(src2 + (y + 5) * src2_stride + j);
+					g = _mm256_mul_ps(g, g2);
+					sum1 = _mm256_mul_ps(f5, g);
+				case 5:
+					g = _mm256_load_ps(src1 + (y + 4) * src1_stride + j);
+					g2 = _mm256_load_ps(src2 + (y + 4) * src2_stride + j);
+					g = _mm256_mul_ps(g, g2);
+					g = _mm256_mul_ps(f4, g);
+					sum0 = _mm256_add_ps(sum0, g);
+				case 4:
+					g = _mm256_load_ps(src1 + (y + 3) * src1_stride + j);
+					g2 = _mm256_load_ps(src2 + (y + 3) * src2_stride + j);
+					g = _mm256_mul_ps(g, g2);
+					g = _mm256_mul_ps(f3, g);
+					sum3 = _mm256_add_ps(sum3, g);
+				case 3:
+					g = _mm256_load_ps(src1 + (y + 2) * src1_stride + j);
+					g2 = _mm256_load_ps(src2 + (y + 2) * src2_stride + j);
+					g = _mm256_mul_ps(g, g2);
+					g = _mm256_mul_ps(f2, g);
+					sum2 = _mm256_add_ps(sum2, g);
+				case 2:
+					g = _mm256_load_ps(src1 + (y + 1) * src1_stride + j);
+					g2 = _mm256_load_ps(src2 + (y + 1) * src2_stride + j);
+					g = _mm256_mul_ps(g, g2);
+					g = _mm256_mul_ps(f1, g);
+					sum1 = _mm256_add_ps(sum1, g);
+				case 1:
+					g = _mm256_load_ps(src1 + (y + 0) * src1_stride + j);
+					g2 = _mm256_load_ps(src2 + (y + 0) * src2_stride + j);
+					g = _mm256_mul_ps(g, g2);
+					g = _mm256_mul_ps(f0, g);
+					sum0 = _mm256_add_ps(sum0, g);
+				}
+
+				sum0 = _mm256_add_ps(sum0, sum2);
+				sum1 = _mm256_add_ps(sum1, sum3);
+
+				sum0 = _mm256_add_ps(sum0, sum1);
+				accum = _mm256_add_ps(accum, sum0);
+
+				if (y)
+					accum = _mm256_add_ps(accum, _mm256_load_ps(dst + j));
+
+				_mm256_store_ps(dst + j, accum);
+			}
+		}
+	}
+}
+
+FORCE_INLINE inline void convolution_f32_avx_s_1d_v_xy_scanline_17(const float * RESTRICT filter, int filter_width, const float * RESTRICT src1, const float * RESTRICT src2, float * RESTRICT dst, int src1_stride, int src2_stride, int j_end)
+{
+	__m256 f0, f1, f2, f3, f4, f5, f6, f7, f8;
+	src1 -= 8 * src1_stride; // radius = 8
+	src2 -= 8 * src2_stride; // radius = 8
+
+						   // Evaluate filter taps 0-8
+	f0 = _mm256_broadcast_ss(filter + 0);
+	f1 = _mm256_broadcast_ss(filter + 1);
+	f2 = _mm256_broadcast_ss(filter + 2);
+	f3 = _mm256_broadcast_ss(filter + 3);
+	f4 = _mm256_broadcast_ss(filter + 4);
+	f5 = _mm256_broadcast_ss(filter + 5);
+	f6 = _mm256_broadcast_ss(filter + 6);
+	f7 = _mm256_broadcast_ss(filter + 7);
+	f8 = _mm256_broadcast_ss(filter + 8);
+
+	for (int j = 0; j < j_end; j += 8) {
+		__m256 sum0, sum1, sum2, sum3;
+		__m256 g, g2;
+
+		g = _mm256_load_ps(src1 + 0 * src1_stride + j);
+		g2 = _mm256_load_ps(src2 + 0 * src2_stride + j);
+		g = _mm256_mul_ps(g, g2);
+		g = _mm256_mul_ps(f0, g);
+		sum0 = g;
+
+		g = _mm256_load_ps(src1 + 1 * src1_stride + j);
+		g2 = _mm256_load_ps(src2 + 1 * src2_stride + j);
+		g = _mm256_mul_ps(g, g2);
+		g = _mm256_mul_ps(f1, g);
+		sum1 = g;
+
+		g = _mm256_load_ps(src1 + 2 * src1_stride + j);
+		g2 = _mm256_load_ps(src2 + 2 * src2_stride + j);
+		g = _mm256_mul_ps(g, g2);
+		g = _mm256_mul_ps(f2, g);
+		sum2 = g;
+
+		g = _mm256_load_ps(src1 + 3 * src1_stride + j);
+		g2 = _mm256_load_ps(src2 + 3 * src2_stride + j);
+		g = _mm256_mul_ps(g, g2);
+		g = _mm256_mul_ps(f3, g);
+		sum3 = g;
+
+		g = _mm256_load_ps(src1 + 4 * src1_stride + j);
+		g2 = _mm256_load_ps(src2 + 4 * src2_stride + j);
+		g = _mm256_mul_ps(g, g2);
+		g = _mm256_mul_ps(f4, g);
+		sum0 = _mm256_add_ps(sum0, g);
+
+		g = _mm256_load_ps(src1 + 5 * src1_stride + j);
+		g2 = _mm256_load_ps(src2 + 5 * src2_stride + j);
+		g = _mm256_mul_ps(g, g2);
+		g = _mm256_mul_ps(f5, g);
+		sum1 = _mm256_add_ps(sum1, g);
+
+		g = _mm256_load_ps(src1 + 6 * src1_stride + j);
+		g2 = _mm256_load_ps(src2 + 6 * src2_stride + j);
+		g = _mm256_mul_ps(g, g2);
+		g = _mm256_mul_ps(f6, g);
+		sum2 = _mm256_add_ps(sum2, g);
+
+		g = _mm256_load_ps(src1 + 7 * src1_stride + j);
+		g2 = _mm256_load_ps(src2 + 7 * src2_stride + j);
+		g = _mm256_mul_ps(g, g2);
+		g = _mm256_mul_ps(f7, g);
+		sum3 = _mm256_add_ps(sum3, g);
+
+		g = _mm256_load_ps(src1 + 8 * src1_stride + j);
+		g2 = _mm256_load_ps(src2 + 8 * src2_stride + j);
+		g = _mm256_mul_ps(g, g2);
+		g = _mm256_mul_ps(f8, g);
+		sum0 = _mm256_add_ps(sum0, g);
+
+		sum0 = _mm256_add_ps(sum0, sum2);
+		sum1 = _mm256_add_ps(sum1, sum3);
+
+		sum0 = _mm256_add_ps(sum0, sum1);
+
+		_mm256_store_ps(dst + j, sum0);
+	}
+
+	// Evaluate filter taps 9-16
+	f0 = _mm256_broadcast_ss(filter + 9);
+	f1 = _mm256_broadcast_ss(filter + 10);
+	f2 = _mm256_broadcast_ss(filter + 11);
+	f3 = _mm256_broadcast_ss(filter + 12);
+	f4 = _mm256_broadcast_ss(filter + 13);
+	f5 = _mm256_broadcast_ss(filter + 14);
+	f6 = _mm256_broadcast_ss(filter + 15);
+	f7 = _mm256_broadcast_ss(filter + 16);
+
+	for (int j = 0; j < j_end; j += 8) {
+		__m256 sum0, sum1, sum2, sum3;
+		__m256 g, g2;
+
+		g = _mm256_load_ps(src1 + 9 * src1_stride + j);
+		g2 = _mm256_load_ps(src2 + 9 * src2_stride + j);
+		g = _mm256_mul_ps(g, g2);
+		g = _mm256_mul_ps(f0, g);
+		sum0 = g;
+
+		g = _mm256_load_ps(src1 + 10 * src1_stride + j);
+		g2 = _mm256_load_ps(src2 + 10 * src2_stride + j);
+		g = _mm256_mul_ps(g, g2);
+		g = _mm256_mul_ps(f1, g);
+		sum1 = g;
+
+		g = _mm256_load_ps(src1 + 11 * src1_stride + j);
+		g2 = _mm256_load_ps(src2 + 11 * src2_stride + j);
+		g = _mm256_mul_ps(g, g2);
+		g = _mm256_mul_ps(f2, g);
+		sum2 = g;
+
+		g = _mm256_load_ps(src1 + 12 * src1_stride + j);
+		g2 = _mm256_load_ps(src2 + 12 * src2_stride + j);
+		g = _mm256_mul_ps(g, g2);
+		g = _mm256_mul_ps(f3, g);
+		sum3 = g;
+
+		g = _mm256_load_ps(src1 + 13 * src1_stride + j);
+		g2 = _mm256_load_ps(src2 + 13 * src2_stride + j);
+		g = _mm256_mul_ps(g, g2);
+		g = _mm256_mul_ps(f4, g);
+		sum0 = _mm256_add_ps(sum0, g);
+
+		g = _mm256_load_ps(src1 + 14 * src1_stride + j);
+		g2 = _mm256_load_ps(src2 + 14 * src2_stride + j);
+		g = _mm256_mul_ps(g, g2);
+		g = _mm256_mul_ps(f5, g);
+		sum1 = _mm256_add_ps(sum1, g);
+
+		g = _mm256_load_ps(src1 + 15 * src1_stride + j);
+		g2 = _mm256_load_ps(src2 + 15 * src2_stride + j);
+		g = _mm256_mul_ps(g, g2);
+		g = _mm256_mul_ps(f6, g);
+		sum2 = _mm256_add_ps(sum2, g);
+
+		g = _mm256_load_ps(src1 + 16 * src1_stride + j);
+		g2 = _mm256_load_ps(src2 + 16 * src2_stride + j);
+		g = _mm256_mul_ps(g, g2);
+		g = _mm256_mul_ps(f7, g);
+		sum3 = _mm256_add_ps(sum3, g);
+
+		sum0 = _mm256_add_ps(sum0, sum2);
+		sum1 = _mm256_add_ps(sum1, sum3);
+
+		sum0 = _mm256_add_ps(sum0, sum1);
+
+		sum0 = _mm256_add_ps(_mm256_load_ps(dst + j), sum0);
+		_mm256_store_ps(dst + j, sum0);
+	}
+}
+
+FORCE_INLINE inline void convolution_f32_avx_s_1d_v_xy_scanline_9(const float * RESTRICT filter, int filter_width, const float * RESTRICT src1, const float * RESTRICT src2, float * RESTRICT dst, int src1_stride, int src2_stride, int j_end)
+{
+	__m256 f0, f1, f2, f3, f4, f5, f6, f7, f8;
+	src1 -= 4 * src1_stride; // radius = 4
+	src2 -= 4 * src2_stride; // radius = 4
+
+						   // Evaluate filter taps 0-8
+	f0 = _mm256_broadcast_ss(filter + 0);
+	f1 = _mm256_broadcast_ss(filter + 1);
+	f2 = _mm256_broadcast_ss(filter + 2);
+	f3 = _mm256_broadcast_ss(filter + 3);
+	f4 = _mm256_broadcast_ss(filter + 4);
+	f5 = _mm256_broadcast_ss(filter + 5);
+	f6 = _mm256_broadcast_ss(filter + 6);
+	f7 = _mm256_broadcast_ss(filter + 7);
+	f8 = _mm256_broadcast_ss(filter + 8);
+
+	for (int j = 0; j < j_end; j += 8) {
+		__m256 sum0, sum1, sum2, sum3;
+		__m256 g, g2;
+
+		g = _mm256_load_ps(src1 + 0 * src1_stride + j);
+		g2 = _mm256_load_ps(src2 + 0 * src2_stride + j);
+		g = _mm256_mul_ps(g, g2);
+		g = _mm256_mul_ps(f0, g);
+		sum0 = g;
+
+		g = _mm256_load_ps(src1 + 1 * src1_stride + j);
+		g2 = _mm256_load_ps(src2 + 1 * src2_stride + j);
+		g = _mm256_mul_ps(g, g2);
+		g = _mm256_mul_ps(f1, g);
+		sum1 = g;
+
+		g = _mm256_load_ps(src1 + 2 * src1_stride + j);
+		g2 = _mm256_load_ps(src2 + 2 * src2_stride + j);
+		g = _mm256_mul_ps(g, g2);
+		g = _mm256_mul_ps(f2, g);
+		sum2 = g;
+
+		g = _mm256_load_ps(src1 + 3 * src1_stride + j);
+		g2 = _mm256_load_ps(src2 + 3 * src2_stride + j);
+		g = _mm256_mul_ps(g, g2);
+		g = _mm256_mul_ps(f3, g);
+		sum3 = g;
+
+		g = _mm256_load_ps(src1 + 4 * src1_stride + j);
+		g2 = _mm256_load_ps(src2 + 4 * src2_stride + j);
+		g = _mm256_mul_ps(g, g2);
+		g = _mm256_mul_ps(f4, g);
+		sum0 = _mm256_add_ps(sum0, g);
+
+		g = _mm256_load_ps(src1 + 5 * src1_stride + j);
+		g2 = _mm256_load_ps(src2 + 5 * src2_stride + j);
+		g = _mm256_mul_ps(g, g2);
+		g = _mm256_mul_ps(f5, g);
+		sum1 = _mm256_add_ps(sum1, g);
+
+		g = _mm256_load_ps(src1 + 6 * src1_stride + j);
+		g2 = _mm256_load_ps(src2 + 6 * src2_stride + j);
+		g = _mm256_mul_ps(g, g2);
+		g = _mm256_mul_ps(f6, g);
+		sum2 = _mm256_add_ps(sum2, g);
+
+		g = _mm256_load_ps(src1 + 7 * src1_stride + j);
+		g2 = _mm256_load_ps(src2 + 7 * src2_stride + j);
+		g = _mm256_mul_ps(g, g2);
+		g = _mm256_mul_ps(f7, g);
+		sum3 = _mm256_add_ps(sum3, g);
+
+		g = _mm256_load_ps(src1 + 8 * src1_stride + j);
+		g2 = _mm256_load_ps(src2 + 8 * src2_stride + j);
+		g = _mm256_mul_ps(g, g2);
+		g = _mm256_mul_ps(f8, g);
+		sum0 = _mm256_add_ps(sum0, g);
+
+		sum0 = _mm256_add_ps(sum0, sum2);
+		sum1 = _mm256_add_ps(sum1, sum3);
+
+		sum0 = _mm256_add_ps(sum0, sum1);
+
+		_mm256_store_ps(dst + j, sum0);
+	}
+}
+
+FORCE_INLINE inline void convolution_f32_avx_s_1d_v_xy_scanline_5(const float * RESTRICT filter, int filter_width, const float * RESTRICT src1, const float * RESTRICT src2, float * RESTRICT dst, int src1_stride, int src2_stride, int j_end)
+{
+	__m256 f0, f1, f2, f3, f4;
+	src1 -= 2 * src1_stride; // radius = 2
+	src2 -= 2 * src2_stride; // radius = 2
+
+						   // Evaluate filter taps 0-5
+	f0 = _mm256_broadcast_ss(filter + 0);
+	f1 = _mm256_broadcast_ss(filter + 1);
+	f2 = _mm256_broadcast_ss(filter + 2);
+	f3 = _mm256_broadcast_ss(filter + 3);
+	f4 = _mm256_broadcast_ss(filter + 4);
+
+	for (int j = 0; j < j_end; j += 8) {
+		__m256 sum0, sum1, sum2, sum3;
+		__m256 g, g2;
+
+		g = _mm256_load_ps(src1 + 0 * src1_stride + j);
+		g2 = _mm256_load_ps(src2 + 0 * src2_stride + j);
+		g = _mm256_mul_ps(g, g2);
+		g = _mm256_mul_ps(f0, g);
+		sum0 = g;
+
+		g = _mm256_load_ps(src1 + 1 * src1_stride + j);
+		g2 = _mm256_load_ps(src2 + 1 * src2_stride + j);
+		g = _mm256_mul_ps(g, g2);
+		g = _mm256_mul_ps(f1, g);
+		sum1 = g;
+
+		g = _mm256_load_ps(src1 + 2 * src1_stride + j);
+		g2 = _mm256_load_ps(src2 + 2 * src2_stride + j);
+		g = _mm256_mul_ps(g, g2);
+		g = _mm256_mul_ps(f2, g);
+		sum2 = g;
+
+		g = _mm256_load_ps(src1 + 3 * src1_stride + j);
+		g2 = _mm256_load_ps(src2 + 3 * src2_stride + j);
+		g = _mm256_mul_ps(g, g2);
+		g = _mm256_mul_ps(f3, g);
+		sum3 = g;
+
+		g = _mm256_load_ps(src1 + 4 * src1_stride + j);
+		g2 = _mm256_load_ps(src2 + 4 * src2_stride + j);
+		g = _mm256_mul_ps(g, g2);
+		g = _mm256_mul_ps(f4, g);
+		sum0 = _mm256_add_ps(sum0, g);
+
+		sum0 = _mm256_add_ps(sum0, sum2);
+		sum1 = _mm256_add_ps(sum1, sum3);
+
+		sum0 = _mm256_add_ps(sum0, sum1);
+
+		_mm256_store_ps(dst + j, sum0);
+	}
+}
+
+void convolution_f32_avx_s_1d_xy(
+	int N,
+	const float * RESTRICT filter,
+	int filter_width,
+	const float * RESTRICT src1,
+	const float * RESTRICT src2,
+	float * RESTRICT dst,
+	float * RESTRICT tmp,
+	int width,
+	int height,
+	int src1_stride,
+	int src2_stride,
+	int dst_stride)
+{
+	int radius = filter_width / 2;
+	int width_mod8 = vmaf_floorn(width, 8);
+	int tmp_stride = vmaf_ceiln(width, 8);
+
+	int i_vec_end = height - radius;
+	int j_vec_end = width_mod8 - vmaf_ceiln(radius + 1, 8);
+
+	// Vertical pass.
+	for (int i = 0; i < radius; ++i) {
+		for (int j = 0; j < width; ++j) {
+			tmp[i * tmp_stride + j] = convolution_edge_xy_s(false, filter, filter_width, src1, src2, width, height, src1_stride, src2_stride, i, j);
+		}
+	}
+	for (int i = radius; i < i_vec_end; ++i) {
+		convolution_f32_avx_s_1d_v_xy_scanline(N, filter, filter_width, src1 + i * src1_stride, src2 + i * src2_stride, tmp + i * tmp_stride, src1_stride, src2_stride, width_mod8);
+
+		for (int j = width_mod8; j < width; ++j) {
+			tmp[i * tmp_stride + j] = convolution_edge_xy_s(false, filter, filter_width, src1, src2, width, height, src1_stride, src2_stride, i, j);
+		}
+	}
+	for (int i = i_vec_end; i < height; ++i) {
+		for (int j = 0; j < width; ++j) {
+			tmp[i * tmp_stride + j] = convolution_edge_xy_s(false, filter, filter_width, src1, src2, width, height, src1_stride, src2_stride, i, j);
+		}
+	}
+
+	// Horizontal pass.
+	for (int i = 0; i < height; ++i) {
+		for (int j = 0; j < radius; ++j) {
+			dst[i * dst_stride + j] = convolution_edge_s(true, filter, filter_width, tmp, width, height, tmp_stride, i, j);
+		}
+
+		convolution_f32_avx_s_1d_h_scanline(N, filter, filter_width, tmp + i * tmp_stride, dst + i * dst_stride, j_vec_end);
+
+		for (int j = j_vec_end + radius; j < width; ++j) {
+			dst[i * dst_stride + j] = convolution_edge_s(true, filter, filter_width, tmp, width, height, tmp_stride, i, j);
+		}
+	}
+}
+
+void convolution_f32_avx_xy_s(const float *filter, int filter_width, const float *src1, const float *src2, float *dst, float *tmp, int width, int height, int src1_stride, int src2_stride, int dst_stride)
+{
+	switch (filter_width) {
+	case 17:
+		convolution_f32_avx_s_1d_xy(17, filter, filter_width, src1, src2, dst, tmp, width, height, src1_stride, src2_stride, dst_stride);
+		break;
+	case 9:
+		convolution_f32_avx_s_1d_xy(9, filter, filter_width, src1, src2, dst, tmp, width, height, src1_stride, src2_stride, dst_stride);
+		break;
+	case 5:
+		convolution_f32_avx_s_1d_xy(5, filter, filter_width, src1, src2, dst, tmp, width, height, src1_stride, src2_stride, dst_stride);
+		break;
+	case 3:
+		convolution_f32_avx_s_1d_xy(3, filter, filter_width, src1, src2, dst, tmp, width, height, src1_stride, src2_stride, dst_stride);
+		break;
+	default:
+		convolution_f32_avx_s_1d_xy(0, filter, filter_width, src1, src2, dst, tmp, width, height, src1_stride, src2_stride, dst_stride);
+		break;
+	}
+}
+#endif
diff --git a/feature/src/common/convolution_internal.h b/feature/src/common/convolution_internal.h
index d41a2f9ff..1bf5a3863 100644
--- a/feature/src/common/convolution_internal.h
+++ b/feature/src/common/convolution_internal.h
@@ -51,4 +51,64 @@ FORCE_INLINE inline float convolution_edge_s(bool horizontal, const float *filte
 	return accum;
 }
 
+#if VIF_OPT_ENABLE 
+FORCE_INLINE inline float convolution_edge_sq_s(bool horizontal, const float *filter, int filter_width, const float *src, int width, int height, int stride, int i, int j)
+{
+	int radius = filter_width / 2;
+
+	float accum = 0;
+	float src_val;
+	for (int k = 0; k < filter_width; ++k) {
+		int i_tap = horizontal ? i : i - radius + k;
+		int j_tap = horizontal ? j - radius + k : j;
+
+		// Handle edges by mirroring.
+		if (horizontal) {
+			if (j_tap < 0)
+				j_tap = -j_tap;
+			else if (j_tap >= width)
+				j_tap = width - (j_tap - width + 1);
+		}
+		else {
+			if (i_tap < 0)
+				i_tap = -i_tap;
+			else if (i_tap >= height)
+				i_tap = height - (i_tap - height + 1);
+		}
+		src_val = src[i_tap * stride + j_tap];
+		accum += filter[k] * (src_val * src_val);
+	}
+	return accum;
+}
+
+FORCE_INLINE inline float convolution_edge_xy_s(bool horizontal, const float *filter, int filter_width, const float *src1, const float *src2, int width, int height, int stride1, int stride2, int i, int j)
+{
+	int radius = filter_width / 2;
+
+	float accum = 0;
+	float src_val1, src_val2;
+	for (int k = 0; k < filter_width; ++k) {
+		int i_tap = horizontal ? i : i - radius + k;
+		int j_tap = horizontal ? j - radius + k : j;
+
+		// Handle edges by mirroring.
+		if (horizontal) {
+			if (j_tap < 0)
+				j_tap = -j_tap;
+			else if (j_tap >= width)
+				j_tap = width - (j_tap - width + 1);
+		}
+		else {
+			if (i_tap < 0)
+				i_tap = -i_tap;
+			else if (i_tap >= height)
+				i_tap = height - (i_tap - height + 1);
+		}
+		src_val1 = src1[i_tap * stride1 + j_tap];
+		src_val2 = src2[i_tap * stride2 + j_tap];
+		accum += filter[k] * (src_val1 * src_val2);
+	}
+	return accum;
+}
+#endif
 #endif // CONVOLUTION_INTERNAL_H_
diff --git a/feature/src/vif.c b/feature/src/vif.c
index 165677e75..b3f13169b 100644
--- a/feature/src/vif.c
+++ b/feature/src/vif.c
@@ -26,6 +26,8 @@
 #include "common/alloc.h"
 #include "common/file_io.h"
 #include "vif_options.h"
+#include "convolution.h"
+#include "convolution_internal.h"
 #include "vif_tools.h"
 
 #define read_image_b       read_image_b2s
@@ -40,6 +42,10 @@
 #define vif_statistic      vif_statistic_s
 #define offset_image       offset_image_s
 
+#if VIF_OPT_ENABLE
+#define vif_filter1d_sq    vif_filter1d_sq_s
+#define vif_filter1d_xy    vif_filter1d_xy_s
+#endif
 int compute_vif(const float *ref, const float *dis, int w, int h, int ref_stride, int dis_stride, double *score, double *score_num, double *score_den, double *scores)
 {
     float *data_buf = 0;
@@ -53,15 +59,22 @@ int compute_vif(const float *ref, const float *dis, int w, int h, int ref_stride
 
     float *mu1;
     float *mu2;
-    float *mu1_sq;
-    float *mu2_sq;
-    float *mu1_mu2;
     float *ref_sq_filt;
     float *dis_sq_filt;
     float *ref_dis_filt;
+    float *tmpbuf;
+
+
+#if VIF_OPT_ENABLE
     float *num_array;
     float *den_array;
-    float *tmpbuf;
+#else
+	float *mu1_sq;
+	float *mu2_sq;
+	float *mu1_mu2;
+    float *num_array;
+    float *den_array;
+#endif
 
     /* Offset pointers to adjust for convolution border handling. */
     float *mu1_adj = 0;
@@ -88,11 +101,42 @@ int compute_vif(const float *ref, const float *dis, int w, int h, int ref_stride
     int buf_stride = ALIGN_CEIL(w * sizeof(float));
     size_t buf_sz_one = (size_t)buf_stride * h;
 
-    double num = 0;
-    double den = 0;
+    float num = 0;
+    float den = 0;
 
     int scale;
     int ret = 1;
+#if VIF_OPT_ENABLE
+	// Code optimized to save on multiple buffer copies 
+	// hence the reduction in the number of buffers required from 15 to 10 
+#define VIF_BUF_CNT 10	
+	if (SIZE_MAX / buf_sz_one < VIF_BUF_CNT)
+	{
+		printf("error: SIZE_MAX / buf_sz_one < VIF_BUF_CNT, buf_sz_one = %zu.\n", buf_sz_one);
+		fflush(stdout);
+		goto fail_or_end;
+	}
+
+	if (!(data_buf = aligned_malloc(buf_sz_one * VIF_BUF_CNT, MAX_ALIGN)))
+	{
+		printf("error: aligned_malloc failed for data_buf.\n");
+		fflush(stdout);
+		goto fail_or_end;
+	}
+
+	data_top = (char *)data_buf;
+
+	ref_scale = (float *)data_top; data_top += buf_sz_one;
+	dis_scale = (float *)data_top; data_top += buf_sz_one;
+	mu1 = (float *)data_top; data_top += buf_sz_one;
+	mu2 = (float *)data_top; data_top += buf_sz_one;
+	ref_sq_filt = (float *)data_top; data_top += buf_sz_one;
+	dis_sq_filt = (float *)data_top; data_top += buf_sz_one;
+	ref_dis_filt = (float *)data_top; data_top += buf_sz_one;
+	num_array    = (float *)data_top; data_top += buf_sz_one;
+    den_array    = (float *)data_top; data_top += buf_sz_one;
+	tmpbuf = (float *)data_top; data_top += buf_sz_one;
+#else
 
     if (SIZE_MAX / buf_sz_one < 15)
     {
@@ -126,6 +170,7 @@ int compute_vif(const float *ref, const float *dis, int w, int h, int ref_stride
     num_array    = (float *)data_top; data_top += buf_sz_one;
     den_array    = (float *)data_top; data_top += buf_sz_one;
     tmpbuf    = (float *)data_top; data_top += buf_sz_one;
+#endif
 
     for (scale = 0; scale < 4; ++scale)
     {
@@ -192,36 +237,49 @@ int compute_vif(const float *ref, const float *dis, int w, int h, int ref_stride
         vif_filter2d(filter, curr_ref_scale, mu1, w, h, curr_ref_stride, buf_stride, filter_width);
         vif_filter2d(filter, curr_dis_scale, mu2, w, h, curr_dis_stride, buf_stride, filter_width);
 #endif
+#if !VIF_OPT_ENABLE
         vif_xx_yy_xy(mu1, mu2, mu1_sq, mu2_sq, mu1_mu2, w, h, buf_stride, buf_stride, buf_stride, buf_stride, buf_stride);
 
         vif_xx_yy_xy(curr_ref_scale, curr_dis_scale, ref_sq, dis_sq, ref_dis, w, h, curr_ref_stride, curr_dis_stride, buf_stride, buf_stride, buf_stride);
+#endif
 #ifdef VIF_OPT_FILTER_1D
+#if VIF_OPT_ENABLE
+
+		// Code optimized by adding intrinsic code for the functions, 
+		// vif_filter1d_sq and vif_filter1d_sq
+		vif_filter1d_sq(filter, curr_ref_scale, ref_sq_filt, tmpbuf, w, h, curr_ref_stride, buf_stride, filter_width);
+		vif_filter1d_sq(filter, curr_dis_scale, dis_sq_filt, tmpbuf, w, h, curr_dis_stride, buf_stride, filter_width);
+		vif_filter1d_xy(filter, curr_ref_scale, curr_dis_scale, ref_dis_filt, tmpbuf, w, h, curr_ref_stride, curr_dis_stride, buf_stride, filter_width);
+#else
         vif_filter1d(filter, ref_sq, ref_sq_filt, tmpbuf, w, h, buf_stride, buf_stride, filter_width);
         vif_filter1d(filter, dis_sq, dis_sq_filt, tmpbuf, w, h, buf_stride, buf_stride, filter_width);
         vif_filter1d(filter, ref_dis, ref_dis_filt, tmpbuf, w, h, buf_stride, buf_stride, filter_width);
+#endif
 #else
         vif_filter2d(filter, ref_sq, ref_sq_filt, w, h, buf_stride, buf_stride, filter_width);
         vif_filter2d(filter, dis_sq, dis_sq_filt, w, h, buf_stride, buf_stride, filter_width);
         vif_filter2d(filter, ref_dis, ref_dis_filt, w, h, buf_stride, buf_stride, filter_width);
 #endif
+#if VIF_OPT_ENABLE
+		vif_statistic(mu1, mu2, NULL, ref_sq_filt, dis_sq_filt, ref_dis_filt, num_array, den_array,
+			w, h, buf_stride, buf_stride, buf_stride, buf_stride, buf_stride, buf_stride, buf_stride, buf_stride);
+#else
         vif_statistic(mu1_sq, mu2_sq, mu1_mu2, ref_sq_filt, dis_sq_filt, ref_dis_filt, num_array, den_array,
                       w, h, buf_stride, buf_stride, buf_stride, buf_stride, buf_stride, buf_stride, buf_stride, buf_stride);
-
+#endif
         mu1_adj = ADJUST(mu1);
         mu2_adj = ADJUST(mu2);
 
 #ifdef VIF_OPT_DEBUG_DUMP
-        mu1_sq_adj  = ADJUST(mu1_sq);
-        mu2_sq_adj  = ADJUST(mu2_sq);
-        mu1_mu2_adj = ADJUST(mu1_mu2);
-
         ref_sq_filt_adj  = ADJUST(ref_sq_filt);
         dis_sq_filt_adj  = ADJUST(dis_sq_filt);
         ref_dis_filt_adj = ADJUST(ref_dis_filt);
 #endif
 
+#if !VIF_OPT_ENABLE
         num_array_adj = ADJUST(num_array);
         den_array_adj = ADJUST(den_array);
+#endif
 #undef ADJUST
 
 #ifdef VIF_OPT_DEBUG_DUMP
@@ -237,15 +295,6 @@ int compute_vif(const float *ref, const float *dis, int w, int h, int ref_stride
         sprintf(pathbuf, "stage/mu2[%d].bin", scale);
         write_image(pathbuf, mu2_adj, buf_valid_w, buf_valid_h, buf_stride, sizeof(float));
 
-        sprintf(pathbuf, "stage/mu1_sq[%d].bin", scale);
-        write_image(pathbuf, mu1_sq_adj, buf_valid_w, buf_valid_h, buf_stride, sizeof(float));
-
-        sprintf(pathbuf, "stage/mu2_sq[%d].bin", scale);
-        write_image(pathbuf, mu2_sq_adj, buf_valid_w, buf_valid_h, buf_stride, sizeof(float));
-
-        sprintf(pathbuf, "stage/mu1_mu2[%d].bin", scale);
-        write_image(pathbuf, mu1_mu2_adj, buf_valid_w, buf_valid_h, buf_stride, sizeof(float));
-
         sprintf(pathbuf, "stage/ref_sq_filt[%d].bin", scale);
         write_image(pathbuf, ref_sq_filt_adj, buf_valid_w, buf_valid_h, buf_stride, sizeof(float));
 
@@ -262,8 +311,13 @@ int compute_vif(const float *ref, const float *dis, int w, int h, int ref_stride
         write_image(pathbuf, den_array_adj, buf_valid_w, buf_valid_h, buf_stride, sizeof(float));
 #endif
 
+#if VIF_OPT_ENABLE
+		num = *num_array;
+		den = *den_array;
+#else
         num = vif_sum(num_array_adj, buf_valid_w, buf_valid_h, buf_stride);
         den = vif_sum(den_array_adj, buf_valid_w, buf_valid_h, buf_stride);
+#endif
 
         scores[2*scale] = num;
         scores[2*scale+1] = den;
diff --git a/feature/src/vif_options.h b/feature/src/vif_options.h
index ef1f93b2e..61fe2ae49 100644
--- a/feature/src/vif_options.h
+++ b/feature/src/vif_options.h
@@ -36,4 +36,10 @@
 /* Whether to use a 1-D formulation of the Gaussian filter. */
 #define VIF_OPT_FILTER_1D
 
+/* VIF optimizations are enabled only for ID filter */ 
+#ifdef VIF_OPT_FILTER_1D
+#define VIF_OPT_ENABLE 1
+#else
+#define VIF_OPT_ENABLE 0
+#endif
 #endif /* VIF_OPTIONS_H_ */
diff --git a/feature/src/vif_tools.c b/feature/src/vif_tools.c
index 8fae514e2..8f753fec5 100644
--- a/feature/src/vif_tools.c
+++ b/feature/src/vif_tools.c
@@ -23,6 +23,8 @@
 #include <string.h>
 #include "common/alloc.h"
 #include "vif_options.h"
+#include "convolution.h"
+#include "convolution_internal.h"
 #include "vif_tools.h"
 #include "common/cpu.h"
 
@@ -211,6 +213,76 @@ void vif_xx_yy_xy_s(const float *x, const float *y, float *xx, float *yy, float
     }
 }
 
+#if VIF_OPT_ENABLE
+void vif_statistic_s(const float *mu1, const float *mu2, const float *mu1_mu2, const float *xx_filt, const float *yy_filt, const float *xy_filt, float *num, float *den,
+	int w, int h, int mu1_stride, int mu2_stride, int mu1_mu2_stride, int xx_filt_stride, int yy_filt_stride, int xy_filt_stride, int num_stride, int den_stride)
+{
+	static const float sigma_nsq = 2;
+	static const float sigma_max_inv = 4.0 / (255.0*255.0);
+
+	int mu1_px_stride = mu1_stride / sizeof(float);
+	int mu2_px_stride = mu2_stride / sizeof(float);
+	int mu1_mu2_px_stride = mu1_mu2_stride / sizeof(float);
+	int xx_filt_px_stride = xx_filt_stride / sizeof(float);
+	int yy_filt_px_stride = yy_filt_stride / sizeof(float);
+	int xy_filt_px_stride = xy_filt_stride / sizeof(float);
+	int num_px_stride = num_stride / sizeof(float);
+	int den_px_stride = den_stride / sizeof(float);
+
+	float mu1_sq_val, mu2_sq_val, mu1_mu2_val, xx_filt_val, yy_filt_val, xy_filt_val;
+	float sigma1_sq, sigma2_sq, sigma12, g, sv_sq;
+	float num_val, den_val;
+	int i, j;
+
+	float accum_num = 0.0;
+	float accum_den = 0.0;
+
+	for (i = 0; i < h; ++i) {
+		float accum_inner_num = 0;
+		float accum_inner_den = 0;
+		for (j = 0; j < w; ++j) {
+			float mu1_val = mu1[i * mu1_px_stride + j];
+			float mu2_val = mu2[i * mu2_px_stride + j];
+			mu1_sq_val = mu1_val * mu1_val; // same name as the Matlab code vifp_mscale.m
+			mu2_sq_val = mu2_val * mu2_val;
+			mu1_mu2_val = mu1_val * mu2_val; //mu1_mu2[i * mu1_mu2_px_stride + j];
+			xx_filt_val = xx_filt[i * xx_filt_px_stride + j];
+			yy_filt_val = yy_filt[i * yy_filt_px_stride + j];
+			xy_filt_val = xy_filt[i * xy_filt_px_stride + j];
+
+			sigma1_sq = xx_filt_val - mu1_sq_val;
+			sigma2_sq = yy_filt_val - mu2_sq_val;
+			sigma12 = xy_filt_val - mu1_mu2_val;
+
+			if (sigma1_sq < sigma_nsq) {
+				num_val = 1.0 - sigma2_sq * sigma_max_inv;
+				den_val = 1.0;
+			}
+			else {
+				sv_sq = (sigma2_sq + sigma_nsq) * sigma1_sq;
+				if (sigma12 < 0)
+				{
+					num_val = 0.0;
+				}
+				else
+				{
+					g = sv_sq - sigma12 * sigma12;
+					num_val = log2f(sv_sq / g);
+				}
+				den_val = log2f(1.0f + sigma1_sq / sigma_nsq);
+			}
+
+			accum_inner_num += num_val;
+			accum_inner_den += den_val;
+		}
+
+		accum_num += accum_inner_num;
+		accum_den += accum_inner_den;
+	}
+	num[0] = accum_num;
+	den[0] = accum_den;
+}
+#else
 void vif_statistic_s(const float *mu1_sq, const float *mu2_sq, const float *mu1_mu2, const float *xx_filt, const float *yy_filt, const float *xy_filt, float *num, float *den,
                      int w, int h, int mu1_sq_stride, int mu2_sq_stride, int mu1_mu2_stride, int xx_filt_stride, int yy_filt_stride, int xy_filt_stride, int num_stride, int den_stride)
 {
@@ -267,6 +339,7 @@ void vif_statistic_s(const float *mu1_sq, const float *mu2_sq, const float *mu1_
         }
     }
 }
+#endif
 
 void vif_filter1d_s(const float *f, const float *src, float *dst, float *tmpbuf, int w, int h, int src_stride, int dst_stride, int fwidth)
 {
@@ -329,6 +402,136 @@ void vif_filter1d_s(const float *f, const float *src, float *dst, float *tmpbuf,
 
     aligned_free(tmp);
 }
+#if	VIF_OPT_ENABLE
+// Code optimized by adding intrinsic code for the functions, 
+// vif_filter1d_sq and vif_filter1d_sq
+
+void vif_filter1d_sq_s(const float *f, const float *src, float *dst, float *tmpbuf, int w, int h, int src_stride, int dst_stride, int fwidth)
+{
+
+	int src_px_stride = src_stride / sizeof(float);
+	int dst_px_stride = dst_stride / sizeof(float);
+
+	/* if support avx */
+	
+	if (cpu >= VMAF_CPU_AVX)
+	{
+		convolution_f32_avx_sq_s(f, fwidth, src, dst, tmpbuf, w, h, src_px_stride, dst_px_stride);
+		return;
+	}
+
+	/* fall back */
+
+	float *tmp = aligned_malloc(ALIGN_CEIL(w * sizeof(float)), MAX_ALIGN);
+	float fcoeff, imgcoeff;
+
+	int i, j, fi, fj, ii, jj;
+
+	for (i = 0; i < h; ++i) {
+		/* Vertical pass. */
+		for (j = 0; j < w; ++j) {
+			float accum = 0;
+
+			for (fi = 0; fi < fwidth; ++fi) {
+				fcoeff = f[fi];
+
+				ii = i - fwidth / 2 + fi;
+				ii = ii < 0 ? -ii : (ii >= h ? 2 * h - ii - 1 : ii);
+
+				imgcoeff = src[ii * src_px_stride + j];
+
+				accum += fcoeff * (imgcoeff * imgcoeff);
+			}
+
+			tmp[j] = accum;
+		}
+
+		/* Horizontal pass. */
+		for (j = 0; j < w; ++j) {
+			float accum = 0;
+
+			for (fj = 0; fj < fwidth; ++fj) {
+				fcoeff = f[fj];
+
+				jj = j - fwidth / 2 + fj;
+				jj = jj < 0 ? -jj : (jj >= w ? 2 * w - jj - 1 : jj);
+
+				imgcoeff = tmp[jj];
+
+				accum += fcoeff * imgcoeff;
+			}
+
+			dst[i * dst_px_stride + j] = accum;
+		}
+	}
+
+	aligned_free(tmp);
+}
+
+void vif_filter1d_xy_s(const float *f, const float *src1, const float *src2, float *dst, float *tmpbuf, int w, int h, int src1_stride, int src2_stride, int dst_stride, int fwidth)
+{
+
+	int src1_px_stride = src1_stride / sizeof(float);
+	int src2_px_stride = src1_stride / sizeof(float);
+	int dst_px_stride = dst_stride / sizeof(float);
+
+	/* if support avx */
+
+	if (cpu >= VMAF_CPU_AVX)
+	{
+		convolution_f32_avx_xy_s(f, fwidth, src1, src2, dst, tmpbuf, w, h, src1_px_stride, src2_px_stride, dst_px_stride);
+		return;
+	}
+
+	/* fall back */
+
+	float *tmp = aligned_malloc(ALIGN_CEIL(w * sizeof(float)), MAX_ALIGN);
+	float fcoeff, imgcoeff, imgcoeff1, imgcoeff2;
+
+	int i, j, fi, fj, ii, jj;
+
+	for (i = 0; i < h; ++i) {
+		/* Vertical pass. */
+		for (j = 0; j < w; ++j) {
+			float accum = 0;
+
+			for (fi = 0; fi < fwidth; ++fi) {
+				fcoeff = f[fi];
+
+				ii = i - fwidth / 2 + fi;
+				ii = ii < 0 ? -ii : (ii >= h ? 2 * h - ii - 1 : ii);
+
+				imgcoeff1 = src1[ii * src1_px_stride + j];
+				imgcoeff2 = src2[ii * src2_px_stride + j];
+
+				accum += fcoeff * (imgcoeff1 * imgcoeff2);
+			}
+
+			tmp[j] = accum;
+		}
+
+		/* Horizontal pass. */
+		for (j = 0; j < w; ++j) {
+			float accum = 0;
+
+			for (fj = 0; fj < fwidth; ++fj) {
+				fcoeff = f[fj];
+
+				jj = j - fwidth / 2 + fj;
+				jj = jj < 0 ? -jj : (jj >= w ? 2 * w - jj - 1 : jj);
+
+				imgcoeff = tmp[jj];
+
+				accum += fcoeff * imgcoeff;
+			}
+
+			dst[i * dst_px_stride + j] = accum;
+		}
+	}
+
+	aligned_free(tmp);
+}
+#endif
 
 void vif_filter2d_s(const float *f, const float *src, float *dst, int w, int h, int src_stride, int dst_stride, int fwidth)
 {
diff --git a/feature/src/vif_tools.h b/feature/src/vif_tools.h
index 60c56bc15..d2a1bd97d 100644
--- a/feature/src/vif_tools.h
+++ b/feature/src/vif_tools.h
@@ -43,6 +43,12 @@ void vif_statistic_s(const float *mu1_sq, const float *mu2_sq, const float *mu1_
 
 void vif_filter1d_s(const float *f, const float *src, float *dst, float *tmpbuf, int w, int h, int src_stride, int dst_stride, int fwidth);
 
+#if VIF_OPT_ENABLE
+void vif_filter1d_sq_s(const float *f, const float *src, float *dst, float *tmpbuf, int w, int h, int src_stride, int dst_stride, int fwidth);
+
+void vif_filter1d_xy_s(const float *f, const float *src1, const float *src2, float *dst, float *tmpbuf, int w, int h, int src1_stride, int src2_stride, int dst_stride, int fwidth);
+#endif
+
 void vif_filter2d_s(const float *f, const float *src, float *dst, int w, int h, int src_stride, int dst_stride, int fwidth);
 
 #endif /* VIF_TOOLS_H_ */
diff --git a/wrapper/Makefile b/wrapper/Makefile
index 7f7e81d27..931751584 100644
--- a/wrapper/Makefile
+++ b/wrapper/Makefile
@@ -9,6 +9,8 @@ OBJDIR = $(TOP)/obj
 FEATURESRCDIR = $(TOP)/../feature/src
 PTOOLSDIR = $(TOP)/../ptools
 INSTALL_PREFIX = /usr/local
+INCLUDES += -I$(TOP)/../feature/src
+INCLUDES += -I$(TOP)/../feature/src/common
 
 OBJS = \
 	$(OBJDIR)/alloc.o \
@@ -72,13 +74,13 @@ $(OBJDIR)/frame.o: $(FEATURESRCDIR)/common/frame.c
 	$(CC) -c -o $@ $(CFLAGS) $(CPPFLAGS) $<
 
 $(OBJDIR)/convolution.o: $(FEATURESRCDIR)/common/convolution.c
-	$(CC) -c -o $@ $(CFLAGS) $(CPPFLAGS) $<
+	$(CC) -c -o $@ $(CFLAGS) $(CPPFLAGS) $(INCLUDES) $<
 
 $(OBJDIR)/cpu.o: $(FEATURESRCDIR)/common/cpu.c
 	$(CC) -c -o $@ $(CFLAGS) $(CPPFLAGS) $<
 
 $(OBJDIR)/convolution_avx.o: $(FEATURESRCDIR)/common/convolution_avx.c
-	$(CC) -c -o $@ $(EXTRA_CFLAGS) $(CFLAGS) $(CPPFLAGS) $<
+	$(CC) -c -o $@ $(EXTRA_CFLAGS) $(CFLAGS) $(CPPFLAGS) $(INCLUDES) $<
 
 $(OBJDIR)/psnr_tools.o: $(FEATURESRCDIR)/psnr_tools.c
 	$(CC) -c -o $@ $(EXTRA_CFLAGS) $(CFLAGS) $(CPPFLAGS) $<
@@ -96,10 +98,10 @@ $(OBJDIR)/ansnr_tools.o: $(FEATURESRCDIR)/ansnr_tools.c
 	$(CC) -c -o $@ $(CFLAGS) $(CPPFLAGS) $<
 
 $(OBJDIR)/vif.o: $(FEATURESRCDIR)/vif.c
-	$(CC) -c -o $@ $(CFLAGS) $(CPPFLAGS) $<
+	$(CC) -c -o $@ $(CFLAGS) $(CPPFLAGS) $(INCLUDES) $<
 
 $(OBJDIR)/vif_tools.o: $(FEATURESRCDIR)/vif_tools.c
-	$(CC) -c -o $@ $(CFLAGS) $(CPPFLAGS) $<
+	$(CC) -c -o $@ $(CFLAGS) $(CPPFLAGS) $(INCLUDES) $<
 
 $(OBJDIR)/motion.o: $(FEATURESRCDIR)/motion.c
 	$(CC) -c -o $@ $(CFLAGS) $(CPPFLAGS) $<
diff --git a/wrapper/src/combo.c b/wrapper/src/combo.c
index 1bbe66890..c96c451b9 100644
--- a/wrapper/src/combo.c
+++ b/wrapper/src/combo.c
@@ -99,6 +99,10 @@ void* combo_threadfunc(void* vmaf_thread_data)
     int ret = 0;
     bool next_frame_read;
 
+#if BUF_OPT_ENABLE		
+    bool offset_flag;
+#endif
+
 #ifdef MULTI_THREADING
     float *prev_blur_buf_ = 0;
     float *ref_buf_ = 0;
@@ -106,6 +110,7 @@ void* combo_threadfunc(void* vmaf_thread_data)
     float *blur_buf_ = 0;
 #endif
 
+#if !BUF_OPT_ENABLE		
     if (!(ref_buf = aligned_malloc(data_sz, MAX_ALIGN)))
     {
         sprintf(errmsg, "aligned_malloc failed for ref_buf.\n");
@@ -143,6 +148,7 @@ void* combo_threadfunc(void* vmaf_thread_data)
         sprintf(errmsg, "aligned_malloc failed for next_blur_buf.\n");
         goto fail_or_end;
     }
+#endif
 
     // use temp_buf for convolution_f32_c, and fread u and v
     if (!(temp_buf = aligned_malloc(data_sz * 2, MAX_ALIGN)))
@@ -173,6 +179,23 @@ void* combo_threadfunc(void* vmaf_thread_data)
 
         if (frm_idx == 0)
         {
+#if BUF_OPT_ENABLE				
+            // Allocating the free buffers from buffer array
+            blur_buf    = get_free_blur_buf_slot(&thread_data->blur_buf_array, frm_idx);
+            ref_buf     = get_free_blur_buf_slot(&thread_data->ref_buf_array, frm_idx);
+            dis_buf     = get_free_blur_buf_slot(&thread_data->dis_buf_array, frm_idx);
+		
+            if((NULL == blur_buf) || (NULL == ref_buf) || (NULL == dis_buf))
+            {
+#ifdef MULTI_THREADING
+                thread_data->stop_threads = 1;			
+                sprintf(errmsg, "No free slot found for buffer allocation.\n");
+                pthread_mutex_unlock(&thread_data->mutex_readframe);
+#endif
+                goto fail_or_end;
+            }
+#endif
+
             // read frame from file
 
             ret = thread_data->read_frame(ref_buf, dis_buf, temp_buf, stride, user_data);
@@ -208,13 +231,31 @@ void* combo_threadfunc(void* vmaf_thread_data)
             convolution_f32_c(FILTER_5, 5, ref_buf, blur_buf, temp_buf, w, h, stride / sizeof(float), stride / sizeof(float));
 
 #ifdef MULTI_THREADING
+#if !BUF_OPT_ENABLE
             put_blur_buf(&thread_data->blur_buf_array, frm_idx, blur_buf);
+#endif
 #endif
 
         }
 #ifdef MULTI_THREADING
         else
         {
+#if BUF_OPT_ENABLE
+            // retrieve from buffer array
+            ref_buf     = get_blur_buf(&thread_data->ref_buf_array, frm_idx);
+            dis_buf     = get_blur_buf(&thread_data->dis_buf_array, frm_idx);
+            blur_buf    = get_blur_buf(&thread_data->blur_buf_array, frm_idx);
+
+            if((NULL == ref_buf) || (NULL == dis_buf) || (NULL == blur_buf))
+            {
+#ifdef MULTI_THREADING
+                thread_data->stop_threads = 1;
+                sprintf(errmsg, "Data not available.\n");
+                pthread_mutex_unlock(&thread_data->mutex_readframe);
+#endif
+                goto fail_or_end;
+            }
+#else
             // retrieve from buffer array
 
             ref_buf_ = get_blur_buf(&thread_data->ref_buf_array, frm_idx);
@@ -228,6 +269,22 @@ void* combo_threadfunc(void* vmaf_thread_data)
             blur_buf_ = get_blur_buf(&thread_data->blur_buf_array, frm_idx);
             memcpy(blur_buf, blur_buf_, data_sz);
             // don't releave blur_buf_array of frm_idx yet, since it will be used by the next frame again
+#endif			
+        }
+#endif
+
+#if BUF_OPT_ENABLE
+        // Allocate free buffer from the buffer array for next frame index
+        next_ref_buf 	= get_free_blur_buf_slot(&thread_data->ref_buf_array, frm_idx + 1);
+        next_dis_buf 	= get_free_blur_buf_slot(&thread_data->dis_buf_array, frm_idx + 1);
+        if((NULL == next_ref_buf) || (NULL == next_dis_buf))
+        {
+#ifdef MULTI_THREADING
+            thread_data->stop_threads = 1;
+            sprintf(errmsg, "No free slot found for next buffer.\n");
+            pthread_mutex_unlock(&thread_data->mutex_readframe);
+#endif
+            goto fail_or_end;
         }
 #endif
 
@@ -252,12 +309,25 @@ void* combo_threadfunc(void* vmaf_thread_data)
             next_frame_read = true;
         }
 
+#if !BUF_OPT_ENABLE		
 #ifdef MULTI_THREADING
         pthread_mutex_unlock(&thread_data->mutex_readframe);
+#endif
 #endif
 
         if (next_frame_read)
         {
+#if BUF_OPT_ENABLE			
+            next_blur_buf     = get_free_blur_buf_slot(&thread_data->blur_buf_array, frm_idx + 1);
+            if(NULL == next_blur_buf)
+            {
+#ifdef MULTI_THREADING
+                thread_data->stop_threads = 1;
+                sprintf(errmsg, "No free slot found for blur buffer.\n");
+#endif
+                goto fail_or_end;
+            }
+#endif
             // ===============================================================
             // offset pixel by OPT_RANGE_PIXEL_OFFSET
             // ===============================================================
@@ -272,14 +342,24 @@ void* combo_threadfunc(void* vmaf_thread_data)
             // ===============================================================
             convolution_f32_c(FILTER_5, 5, next_ref_buf, next_blur_buf, temp_buf, w, h, stride / sizeof(float), stride / sizeof(float));
 
+#if !BUF_OPT_ENABLE					
 #ifdef MULTI_THREADING
             // save next_ref_buf, next_ref_buf and next_ref_buf to buffer array
             put_blur_buf(&thread_data->ref_buf_array, frm_idx + 1, next_ref_buf);
             put_blur_buf(&thread_data->dis_buf_array, frm_idx + 1, next_dis_buf);
             put_blur_buf(&thread_data->blur_buf_array, frm_idx + 1, next_blur_buf);
+#endif
 #endif
         }
 
+#if BUF_OPT_ENABLE
+        // release ref and dis buffer references after blur buf computation
+        release_blur_buf_reference(&thread_data->ref_buf_array, frm_idx + 1);
+        release_blur_buf_reference(&thread_data->dis_buf_array, frm_idx + 1);
+#ifdef MULTI_THREADING
+        pthread_mutex_unlock(&thread_data->mutex_readframe);
+#endif
+#endif
         dbg_printf("frame: %d, ", frm_idx);
 
         // ===============================================================
@@ -287,9 +367,18 @@ void* combo_threadfunc(void* vmaf_thread_data)
         // step they have been offset by OPT_RANGE_PIXEL_OFFSET, now
         // offset them back.
         // ===============================================================
+#if BUF_OPT_ENABLE
+        // offset back the buffers only if required
+        if (frm_idx % n_subsample == 0 && ( (thread_data->psnr_array != NULL) || (thread_data->ssim_array != NULL) || (thread_data->ms_ssim_array != NULL) ))
+        {
+            offset_image(ref_buf, -OPT_RANGE_PIXEL_OFFSET, w, h, stride);
+            offset_image(dis_buf, -OPT_RANGE_PIXEL_OFFSET, w, h, stride);
+            offset_flag = true;
+		}
+#else
         offset_image(ref_buf, -OPT_RANGE_PIXEL_OFFSET, w, h, stride);
         offset_image(dis_buf, -OPT_RANGE_PIXEL_OFFSET, w, h, stride);
-
+#endif
         if (frm_idx % n_subsample == 0 && thread_data->psnr_array != NULL)
         {
             /* =========== psnr ============== */
@@ -338,8 +427,17 @@ void* combo_threadfunc(void* vmaf_thread_data)
         // ===============================================================
         // for the rest, offset pixel by OPT_RANGE_PIXEL_OFFSET
         // ===============================================================
+#if BUF_OPT_ENABLE
+        if(offset_flag)
+        {
+            offset_image(ref_buf, OPT_RANGE_PIXEL_OFFSET, w, h, stride);
+            offset_image(dis_buf, OPT_RANGE_PIXEL_OFFSET, w, h, stride);
+            offset_flag = false;
+		}
+#else		
         offset_image(ref_buf, OPT_RANGE_PIXEL_OFFSET, w, h, stride);
         offset_image(dis_buf, OPT_RANGE_PIXEL_OFFSET, w, h, stride);
+#endif
 
         /* =========== adm ============== */
         if (frm_idx % n_subsample == 0)
@@ -421,8 +519,19 @@ void* combo_threadfunc(void* vmaf_thread_data)
             else
             {
 #ifdef MULTI_THREADING
+#if BUF_OPT_ENABLE
+                // avoid multiple memory copies
+                prev_blur_buf = get_blur_buf(&thread_data->blur_buf_array, frm_idx - 1);
+                if(NULL == prev_blur_buf)
+                {
+                    thread_data->stop_threads = 1;
+                    sprintf(errmsg, "Data not available for prev_blur_buf.\n");
+                    goto fail_or_end;
+                }
+#else				
                 prev_blur_buf_ = get_blur_buf(&thread_data->blur_buf_array, frm_idx - 1);
                 memcpy(prev_blur_buf, prev_blur_buf_, data_sz);
+#endif
 #endif
                 if ((ret = compute_motion(prev_blur_buf, blur_buf, w, h, stride, stride, &score)))
                 {
@@ -430,7 +539,11 @@ void* combo_threadfunc(void* vmaf_thread_data)
                     goto fail_or_end;
                 }
 #ifdef MULTI_THREADING
+#if BUF_OPT_ENABLE
+                release_blur_buf_reference(&thread_data->blur_buf_array, frm_idx - 1);
+#else
                 release_blur_buf(&thread_data->blur_buf_array, frm_idx - 1);
+#endif
 #endif
 
                 if (next_frame_read)
@@ -446,7 +559,9 @@ void* combo_threadfunc(void* vmaf_thread_data)
                 {
                     score2 = score;
 #ifdef MULTI_THREADING
+#if !BUF_OPT_ENABLE
                     release_blur_buf(&thread_data->blur_buf_array, frm_idx); // no more next frames, release this one too
+#endif
 #endif
                 }
             }
@@ -458,6 +573,7 @@ void* combo_threadfunc(void* vmaf_thread_data)
             insert_array_at(thread_data->motion2_array, score2, frm_idx);
 
         }
+#if !BUF_OPT_ENABLE
         else
         {
 #ifdef MULTI_THREADING
@@ -473,6 +589,12 @@ void* combo_threadfunc(void* vmaf_thread_data)
             }
 #endif
         }
+#else
+        /* Indicate that motion score computation for this frame is complete */
+        insert_array_at(thread_data->motion_score_compute_flag_array, 1.0, frm_idx);
+        release_blur_buf_reference(&thread_data->blur_buf_array, frm_idx + 1);
+#endif
+
         /* =========== vif ============== */
 
         if (frm_idx % n_subsample == 0)
@@ -508,12 +630,60 @@ void* combo_threadfunc(void* vmaf_thread_data)
 
         dbg_printf("\n");
 
+#if BUF_OPT_ENABLE
+        //Release references to reference and distorted buffers
+        release_blur_buf_reference(&thread_data->ref_buf_array, frm_idx);
+        release_blur_buf_reference(&thread_data->dis_buf_array, frm_idx);
+        release_blur_buf_reference(&thread_data->blur_buf_array, frm_idx);
+        /*Loop through the slots and release slots if there are no more
+          reference till the current index. Not releasing next frame as
+          it may be required for the next loop						   */
+        for(int i = 0; i <= frm_idx; i++)
+        {
+            int ref_reference_count = get_blur_buf_reference_count(&thread_data->ref_buf_array, i);
+            int dis_reference_count = get_blur_buf_reference_count(&thread_data->dis_buf_array, i);
+
+            if((ref_reference_count == 0) && (dis_reference_count == 0))
+            {
+                release_blur_buf_slot(&thread_data->ref_buf_array, i);
+                release_blur_buf_slot(&thread_data->dis_buf_array, i);
+            }
+        }
+
+        /* Loop through the blur buffer array and release slots only till current index - 1 */
+        /* Only for those whose reference counter is zero */
+        for(int i = 0; i <= (frm_idx - 1); i++)
+        {
+            int reference_count = get_blur_buf_reference_count(&thread_data->blur_buf_array, i);
+            if(reference_count == 0)
+            {
+                /* Release buffer only if motion score is computed for current, previous and next frame */
+                if(
+                    (get_at(thread_data->motion_score_compute_flag_array, i)) &&
+                    (get_at(thread_data->motion_score_compute_flag_array, i + 1)) &&
+                    ((i == 0) || (get_at(thread_data->motion_score_compute_flag_array, i - 1)))
+                    )
+                {
+                    release_blur_buf_slot(&thread_data->blur_buf_array, i);
+                }
+            }
+        }
+
+        /* If this is the last frame then release any subsequent slots */
+        if (!next_frame_read)
+        {
+            release_blur_buf_slot(&thread_data->ref_buf_array, frm_idx + 1);
+            release_blur_buf_slot(&thread_data->dis_buf_array, frm_idx + 1);
+            release_blur_buf_slot(&thread_data->blur_buf_array, frm_idx);
+        }
+#else	
 #ifndef MULTI_THREADING
         // copy to prev_buf
         memcpy(prev_blur_buf, blur_buf, data_sz);
         memcpy(ref_buf, next_ref_buf, data_sz);
         memcpy(dis_buf, next_dis_buf, data_sz);
         memcpy(blur_buf, next_blur_buf, data_sz);
+#endif
 #endif
 
         if (!next_frame_read)
@@ -528,6 +698,7 @@ void* combo_threadfunc(void* vmaf_thread_data)
 
 fail_or_end:
 
+#if !BUF_OPT_ENABLE
     aligned_free(ref_buf);
     aligned_free(dis_buf);
     aligned_free(prev_blur_buf);
@@ -535,6 +706,7 @@ void* combo_threadfunc(void* vmaf_thread_data)
     aligned_free(next_dis_buf);
     aligned_free(next_blur_buf);
     aligned_free(blur_buf);
+#endif	
     aligned_free(temp_buf);
 
 #ifdef MULTI_THREADING
@@ -616,6 +788,12 @@ int combo(int (*read_frame)(float *ref_data, float *main_data, float *temp_data,
     combo_thread_data.stop_threads = 0;
     combo_thread_data.n_subsample = n_subsample;
 
+#if BUF_OPT_ENABLE
+    DArray	motion_score_compute_flag_array;
+    init_array(&motion_score_compute_flag_array, 1000);
+    combo_thread_data.motion_score_compute_flag_array = &motion_score_compute_flag_array;
+#endif
+
     // sanity check for width/height
     if (w <= 0 || h <= 0 || (size_t)w > ALIGN_FLOOR(INT_MAX) / sizeof(float))
     {
@@ -649,10 +827,22 @@ int combo(int (*read_frame)(float *ref_data, float *main_data, float *temp_data,
     }
 
     // for motion analysis we compare to previous buffer and next buffer
+#if BUF_OPT_ENABLE
+    /*
+     *	In the multi-thread mode, allocate a fixed size buffer pool for the reference, distorted and blur buffers.
+     *	At any point, the no. of required ref and dis buffers is 1 more than the total no. of allotted threads,
+        to accomodate reading the next frame index.
+     *	At any point, one thread operates on the current, previous and next blur buffers, and hence, the no. of
+        required blur buffers will be three times the total no. of allotted threads.
+     */
+    init_blur_array(&combo_thread_data.ref_buf_array, combo_thread_data.thread_count + 1, combo_thread_data.data_sz, MAX_ALIGN);
+    init_blur_array(&combo_thread_data.dis_buf_array, combo_thread_data.thread_count + 1, combo_thread_data.data_sz, MAX_ALIGN);
+    init_blur_array(&combo_thread_data.blur_buf_array, 3 * (combo_thread_data.thread_count), combo_thread_data.data_sz, MAX_ALIGN);
+#else	
     init_blur_array(&combo_thread_data.ref_buf_array, combo_thread_data.thread_count, combo_thread_data.data_sz, MAX_ALIGN);
     init_blur_array(&combo_thread_data.dis_buf_array, combo_thread_data.thread_count, combo_thread_data.data_sz, MAX_ALIGN);
     init_blur_array(&combo_thread_data.blur_buf_array, combo_thread_data.thread_count + 2, combo_thread_data.data_sz, MAX_ALIGN);
-
+#endif
     // initialize the mutex that protects the read_frame function
     pthread_mutex_init(&combo_thread_data.mutex_readframe, NULL);
 
@@ -663,9 +853,9 @@ int combo(int (*read_frame)(float *ref_data, float *main_data, float *temp_data,
 
     // start threads
     int t;
-	int numThread = combo_thread_data.thread_count;
-	pthread_t* thread = (pthread_t*)calloc(numThread, sizeof(pthread_t));
-	memset(thread, 0, numThread * sizeof(pthread_t));
+    int numThread = combo_thread_data.thread_count;
+    pthread_t* thread = (pthread_t*)calloc(numThread, sizeof(pthread_t));
+    memset(thread, 0, numThread * sizeof(pthread_t));
 
     for (t=0; t < combo_thread_data.thread_count; t++)
     {
@@ -691,7 +881,12 @@ int combo(int (*read_frame)(float *ref_data, float *main_data, float *temp_data,
     free_blur_buf(&combo_thread_data.dis_buf_array);
     free_blur_buf(&combo_thread_data.blur_buf_array);
 
-	free(thread);
+#if BUF_OPT_ENABLE	
+    free_array(&motion_score_compute_flag_array);
+#endif
+
+    free(thread);
+
     return 0;
 }
 
diff --git a/wrapper/src/combo.h b/wrapper/src/combo.h
index a0558e500..1b335cc64 100644
--- a/wrapper/src/combo.h
+++ b/wrapper/src/combo.h
@@ -78,6 +78,9 @@ typedef struct
     BLUR_BUF_ARRAY blur_buf_array;
     BLUR_BUF_ARRAY ref_buf_array;
     BLUR_BUF_ARRAY dis_buf_array;
+#if BUF_OPT_ENABLE
+	DArray *motion_score_compute_flag_array;
+#endif
 #endif
     int ret;
 
diff --git a/wrapper/src/darray.c b/wrapper/src/darray.c
index c9ab7c6aa..a0fcf0a1e 100644
--- a/wrapper/src/darray.c
+++ b/wrapper/src/darray.c
@@ -18,10 +18,14 @@
 
 #include <stdlib.h>
 #include "darray.h"
+#include "common/blur_array.h"
 
 void init_array(DArray *a, size_t init_size)
 {
     a->array = (double *)malloc(init_size * sizeof(double));
+#if BUF_OPT_ENABLE	
+	memset(a->array, 0.0, init_size * sizeof(double));
+#endif
     a->used = 0;
     a->size = init_size;
 #ifdef MULTI_THREADING
@@ -37,6 +41,12 @@ void insert_array(DArray *a, double e)
     if (a->used == a->size)
     {
         a->size *= 2;
+#if BUF_OPT_ENABLE
+		double *temp;
+		temp = a->array;
+		temp += (a->size / 2);
+		memset(temp, 0.0, (a->size / 2) * sizeof(double));
+#endif
         a->array = (double *)realloc(a->array, a->size * sizeof(double));
     }
     a->array[a->used++] = e;
@@ -59,6 +69,12 @@ void insert_array_at(DArray *a, double e, int pos)
     {
         a->size *= 2;
         a->array = (double *)realloc(a->array, a->size * sizeof(double));
+#if BUF_OPT_ENABLE
+		double *temp;
+		temp = a->array;
+		temp += (a->size / 2);
+		memset(temp, 0.0, (a->size / 2) * sizeof(double));
+#endif
     }
     a->array[pos] = e;
 #ifdef MULTI_THREADING
diff --git a/wrapper/src/libvmaf.h b/wrapper/src/libvmaf.h
index cffdef4e2..22dbc039f 100644
--- a/wrapper/src/libvmaf.h
+++ b/wrapper/src/libvmaf.h
@@ -19,6 +19,15 @@
 #ifndef LIBVMAF_H_
 #define LIBVMAF_H_
 
+#ifndef WINCE
+#define TIME_TEST_ENABLE 		1 // 1: memory leak test enable 0: disable
+#define MEM_LEAK_TEST_ENABLE 	0 // prints execution time in xml log when enabled.
+#else
+//For Windows memory leak test and execution time test cases are not handled.
+#define TIME_TEST_ENABLE 0
+#define MEM_LEAK_TEST_ENABLE 0
+#endif
+
 #ifdef __cplusplus
 extern "C" {
 #endif
diff --git a/wrapper/src/main.cpp b/wrapper/src/main.cpp
index c42879e04..7c714d331 100644
--- a/wrapper/src/main.cpp
+++ b/wrapper/src/main.cpp
@@ -56,6 +56,54 @@ void print_usage(int argc, char *argv[])
     fprintf(stderr, "n_subsample:\n\tn indicates computing on one of every n frames (default 1)\n\n");
 }
 
+#if MEM_LEAK_TEST_ENABLE
+/*
+ * Measures the current (and peak) resident and virtual memories
+ * usage of your linux C process, in kB
+ */
+void getMemory(int itr_ctr, int state)
+{
+	int currRealMem;
+	int peakRealMem;
+	int currVirtMem;
+	int peakVirtMem;
+	char state_str[10]="";
+    // stores each word in status file
+    char buffer[1024] = "";
+	
+	if(state ==1)
+		strcpy(state_str,"start");
+	else
+		strcpy(state_str,"end");
+		
+    // linux file contains this-process info
+    FILE* file = fopen("/proc/self/status", "r");
+
+    // read the entire file
+    while (fscanf(file, " %1023s", buffer) == 1)
+	{
+        if (strcmp(buffer, "VmRSS:") == 0)
+		{
+            fscanf(file, " %d", &currRealMem);
+        }
+        if (strcmp(buffer, "VmHWM:") == 0)
+		{
+            fscanf(file, " %d", &peakRealMem);
+        }
+        if (strcmp(buffer, "VmSize:") == 0)
+		{
+            fscanf(file, " %d", &currVirtMem);
+        }
+        if (strcmp(buffer, "VmPeak:") == 0)
+		{
+            fscanf(file, " %d", &peakVirtMem);
+        }
+    }
+    fclose(file);
+    printf("Iteration %d at %s of process: currRealMem: %6d, peakRealMem: %6d, currVirtMem: %6d, peakVirtMem: %6d\n",itr_ctr, state_str, currRealMem, peakRealMem, currVirtMem, peakVirtMem);
+}
+#endif
+
 int run_wrapper(char *fmt, int width, int height, char *ref_path, char *dis_path, char *model_path,
         char *log_path, char *log_fmt, bool disable_clip, bool disable_avx, bool enable_transform, bool phone_model,
         bool do_psnr, bool do_ssim, bool do_ms_ssim, char *pool_method, int n_thread, int n_subsample, bool enable_conf_interval)
@@ -154,7 +202,10 @@ int main(int argc, char *argv[])
     int n_subsample = 1;
     bool enable_conf_interval = false;
     char *temp;
-
+#if MEM_LEAK_TEST_ENABLE	
+	int itr_ctr;
+	int ret = 0;
+#endif
     /* Check parameters */
 
     if (argc < 7)
@@ -288,9 +339,20 @@ int main(int argc, char *argv[])
 
     try
     {
+#if MEM_LEAK_TEST_ENABLE
+		for(itr_ctr=0;itr_ctr<1000;itr_ctr++)
+		{
+			getMemory(itr_ctr,1);
+			ret = run_wrapper(fmt, width, height, ref_path, dis_path, model_path,
+                log_path, log_fmt, disable_clip, disable_avx, enable_transform, phone_model,
+                do_psnr, do_ssim, do_ms_ssim, pool_method, n_thread, n_subsample, enable_conf_interval);
+			getMemory(itr_ctr,2);
+		}
+#else
         return run_wrapper(fmt, width, height, ref_path, dis_path, model_path,
                 log_path, log_fmt, disable_clip, disable_avx, enable_transform, phone_model,
                 do_psnr, do_ssim, do_ms_ssim, pool_method, n_thread, n_subsample, enable_conf_interval);
+#endif
     }
     catch (const std::exception &e)
     {
diff --git a/wrapper/src/vmaf.cpp b/wrapper/src/vmaf.cpp
index 8f50c47b4..a26c3a57f 100644
--- a/wrapper/src/vmaf.cpp
+++ b/wrapper/src/vmaf.cpp
@@ -25,6 +25,7 @@
 #include <sstream>
 #include <cmath>
 #include <iomanip>
+#include "libvmaf.h"
 
 #include "vmaf.h"
 #include "combo.h"
@@ -1003,6 +1004,9 @@ double RunVmaf(const char* fmt, int width, int height,
     size_t num_frames_subsampled = result.get_scores("vmaf").size();
     double aggregate_vmaf = result.get_score("vmaf");
     double exec_fps = (double)num_frames_subsampled * n_subsample / (double)timer.elapsed();
+#if TIME_TEST_ENABLE
+	double time_taken = (double)timer.elapsed();
+#endif
     printf("Exec FPS: %f\n", exec_fps);
 
     std::vector<std::string> result_keys = result.get_keys();
@@ -1138,6 +1142,9 @@ double RunVmaf(const char* fmt, int width, int height,
         if (aggregate_ms_ssim)
             info_node.append_attribute("aggregateMS_SSIM") = aggregate_ms_ssim;
         info_node.append_attribute("execFps") = exec_fps;
+#if TIME_TEST_ENABLE
+		info_node.append_attribute("timeTaken") = time_taken;
+#endif
 
         auto frames_node = xml_root.append_child("frames");
         for (size_t i_subsampled=0; i_subsampled<num_frames_subsampled; i_subsampled++)

From 7579dba2b95e8d2f01577f598707643d6de5400c Mon Sep 17 00:00:00 2001
From: fishjam <fishjam@163.com>
Date: Wed, 30 Jan 2019 09:11:40 +0800
Subject: [PATCH 11/29] Netflix/vmaf/#286 (#293)

* Netflix/vmaf/#286

refactor for provide Result in libvmaf.h

* fix bug: crash while predictionStructs empty

* format code and activate the CI
---
 wrapper/src/libvmaf.cpp | 271 +++++++++++++++++++++++++++++++++++-----
 wrapper/src/libvmaf.h   |  79 ++++++++++++
 wrapper/src/vmaf.cpp    |  15 +--
 wrapper/src/vmaf.h      | 156 +----------------------
 4 files changed, 324 insertions(+), 197 deletions(-)

diff --git a/wrapper/src/libvmaf.cpp b/wrapper/src/libvmaf.cpp
index 4b7672a76..e8e09de30 100644
--- a/wrapper/src/libvmaf.cpp
+++ b/wrapper/src/libvmaf.cpp
@@ -21,41 +21,245 @@
 #include <cstdio>
 #include "cpu.h"
 
+Asset::Asset(int w, int h, const char *fmt)
+    :w(w), h(h), fmt(fmt) 
+{
+}
+
+Asset::Asset(int w, int h) 
+    :w(w), h(h), fmt("yuv420p") 
+{
+}
+
+int Asset::getWidth()
+{ 
+    return w; 
+}
+
+int Asset::getHeight()
+{ 
+    return h; 
+}
+
+const char* Asset::getFmt()
+{ 
+    return fmt; 
+}
+
+StatVector::StatVector() 
+{
+}
+
+StatVector::StatVector(std::vector<double> l) : l(l) 
+{
+}
+
+std::vector<double> StatVector::getVector()
+{
+    return l;
+}
+
+double StatVector::mean()
+{
+    _assert_size();
+    double sum = 0.0;
+    for (double e : l)
+    {
+        sum += e;
+    }
+    return sum / l.size();
+}
+
+double StatVector::minimum()
+{
+    _assert_size();
+    double min_ = l[0];
+    for (double e : l)
+    {
+        if (e < min_)
+        {
+            min_ = e;
+        }
+    }
+    return min_;
+}
+
+double StatVector::harmonic_mean()
+{
+    _assert_size();
+    double sum = 0.0;
+    for (double e : l)
+    {
+        sum += 1.0 / (e + 1.0);
+    }
+    return 1.0 / (sum / l.size()) - 1.0;
+}
+
+double StatVector::second_moment()
+{
+    _assert_size();
+    double sum = 0.0;
+    for (double e : l)
+    {
+        sum += pow(e, 2);
+    }
+    return sum / l.size();
+}
+
+double StatVector::percentile(double perc)
+{
+    _assert_size();
+    if (perc < 0.0) {
+        perc = 0.0;
+    }
+    else if (perc > 100.0) {
+        perc = 100.0;
+    }
+    std::vector<double> l(this->l);
+    std::sort(l.begin(), l.end());
+    double pos = perc * (this->l.size() - 1) / 100.0;
+    int pos_left = (int)floor(pos);
+    int pos_right = (int)ceil(pos);
+    if (pos_left == pos_right) {
+        return l[pos_left];
+    }
+    else {
+        return l[pos_left] * (pos_right - pos) + l[pos_right] * (pos - pos_left);
+    }
+
+}
+
+double StatVector::var()
+{ 
+    return second_moment() - pow(mean(), 2); 
+}
+
+double StatVector::std()
+{ 
+    return sqrt(var()); 
+}
+
+void StatVector::append(double e)
+{ 
+    l.push_back(e); 
+}
+double StatVector::at(size_t idx)
+{ 
+    return l.at(idx); 
+}
+
+size_t StatVector::size()
+{ 
+    return l.size(); 
+}
+
+void StatVector::_assert_size()
+{
+    if (l.size() == 0) {
+        throw std::runtime_error("StatVector size is 0.");
+    }
+}
+
+Result::Result() : score_aggregate_method(ScoreAggregateMethod::MEAN)
+{
+}
+
+void Result::set_scores(const std::string &key, const StatVector &scores)
+{
+    d[key] = scores; 
+}
+
+StatVector Result::get_scores(const std::string &key)
+{ 
+    return d[key]; 
+}
+
+bool Result::has_scores(const std::string &key)
+{
+    return d.find(key) != d.end(); 
+}
+
+double Result::get_score(const std::string &key)
+{
+    StatVector list = get_scores(key);
+    if (score_aggregate_method == ScoreAggregateMethod::MINIMUM)
+    {
+        return list.minimum();
+    }
+    else if (score_aggregate_method == ScoreAggregateMethod::HARMONIC_MEAN)
+    {
+        return list.harmonic_mean();
+    }
+    else // MEAN
+    {
+        return list.mean();
+    }
+}
+
+std::vector<std::string> Result::get_keys()
+{
+    std::vector<std::string> v;
+    for (std::map<std::string, StatVector>::iterator it = d.begin(); it != d.end(); ++it)
+    {
+        v.push_back(it->first);
+    }
+    return v;
+}
+
+void Result::setScoreAggregateMethod(ScoreAggregateMethod scoreAggregateMethod)
+{
+    score_aggregate_method = scoreAggregateMethod;
+}
+
+std::unique_ptr<IVmafQualityRunner> 
+VmafQualityRunnerFactory::createVmafQualityRunner(const char *model_path, bool enable_conf_interval) {
+    std::unique_ptr<IVmafQualityRunner> runner_ptr;
+    if (enable_conf_interval)
+    {
+        runner_ptr = std::unique_ptr<BootstrapVmafQualityRunner>(new BootstrapVmafQualityRunner(model_path));
+    }
+    else
+    {
+        runner_ptr = std::unique_ptr<VmafQualityRunner>(new VmafQualityRunner(model_path));
+    }
+    return runner_ptr;
+}
+
 extern "C" {
 
-enum vmaf_cpu cpu; // global
-
-int compute_vmaf(double* vmaf_score, char* fmt, int width, int height, int (*read_frame)(float *ref_data, float *main_data, float *temp_data, int stride_byte, void *user_data),
-				 void *user_data, char *model_path, char *log_path, char *log_fmt, int disable_clip, int disable_avx, int enable_transform, int phone_model, int do_psnr,
-				 int do_ssim, int do_ms_ssim, char *pool_method, int n_thread, int n_subsample, int enable_conf_interval)
-	{
-		bool d_c = false;
-		bool d_a = false;
-		bool e_t = false;
-		bool d_p = false;
-		bool d_s = false;
-		bool d_m_s = false;	
-
-		if(enable_transform || phone_model){
-			e_t = true;
-		}
-		if(disable_clip){
-			d_c = true;
-		}
-		if(disable_avx){
-			d_a = true;
-		}
-		if(do_psnr){
-			d_p = true;
-		}
-		if(do_ssim){
-			d_s = true;
-		}
-		if(do_ms_ssim){
-			d_m_s = true;
-		}
-		
-		cpu = cpu_autodetect();
+    enum vmaf_cpu cpu; // global
+
+    int compute_vmaf(double* vmaf_score, char* fmt, int width, int height, int(*read_frame)(float *ref_data, float *main_data, float *temp_data, int stride_byte, void *user_data),
+        void *user_data, char *model_path, char *log_path, char *log_fmt, int disable_clip, int disable_avx, int enable_transform, int phone_model, int do_psnr,
+        int do_ssim, int do_ms_ssim, char *pool_method, int n_thread, int n_subsample, int enable_conf_interval)
+    {
+        bool d_c = false;
+        bool d_a = false;
+        bool e_t = false;
+        bool d_p = false;
+        bool d_s = false;
+        bool d_m_s = false;
+
+        if (enable_transform || phone_model) {
+            e_t = true;
+        }
+        if (disable_clip) {
+            d_c = true;
+        }
+        if (disable_avx) {
+            d_a = true;
+        }
+        if (do_psnr) {
+            d_p = true;
+        }
+        if (do_ssim) {
+            d_s = true;
+        }
+        if (do_ms_ssim) {
+            d_m_s = true;
+        }
+
+        cpu = cpu_autodetect();
 
         if (disable_avx)
         {
@@ -83,5 +287,4 @@ int compute_vmaf(double* vmaf_score, char* fmt, int width, int height, int (*rea
             return -4;
         }
     }
-
 }
diff --git a/wrapper/src/libvmaf.h b/wrapper/src/libvmaf.h
index 22dbc039f..71c82f69e 100644
--- a/wrapper/src/libvmaf.h
+++ b/wrapper/src/libvmaf.h
@@ -40,4 +40,83 @@ int compute_vmaf(double* vmaf_score, char* fmt, int width, int height, int (*rea
 }
 #endif
 
+#ifdef __cplusplus
+#include <vector>
+#include <cstring>
+#include <map>
+#include <memory>
+
+class Asset
+{
+public:
+    Asset(int w, int h, const char *fmt);
+    Asset(int w, int h);
+    int getWidth();
+    int getHeight();
+    const char* getFmt();
+private:
+    const int w, h;
+    const char *fmt;
+};
+
+enum ScoreAggregateMethod
+{
+    MEAN,
+    HARMONIC_MEAN,
+    MINIMUM
+};
+
+class StatVector
+{
+public:
+    StatVector();
+    StatVector(std::vector<double> l);
+    std::vector<double> getVector();
+    double mean();
+    double minimum();
+    double harmonic_mean();
+    double second_moment();
+    double percentile(double perc);
+    double var();
+    double std();
+    void append(double e);
+    double at(size_t idx);
+    size_t size();
+private:
+    std::vector<double> l;
+    void _assert_size();
+};
+
+
+class Result
+{
+public:
+    Result();
+    void set_scores(const std::string &key, const StatVector &scores);
+    StatVector get_scores(const std::string &key);
+    bool has_scores(const std::string &key);
+    double get_score(const std::string &key);
+    std::vector<std::string> get_keys();
+    void setScoreAggregateMethod(ScoreAggregateMethod scoreAggregateMethod);
+private:
+    std::map<std::string, StatVector> d;
+    ScoreAggregateMethod score_aggregate_method;
+};
+
+class IVmafQualityRunner {
+public:
+    virtual Result run(Asset asset, int(*read_frame)(float *ref_data, float *main_data, float *temp_data,
+        int stride, void *user_data), void *user_data, bool disable_clip, bool enable_transform,
+        bool do_psnr, bool do_ssim, bool do_ms_ssim, int n_thread, int n_subsample) = 0;
+    virtual ~IVmafQualityRunner() {}
+};
+
+class VmafQualityRunnerFactory {
+public:
+    static std::unique_ptr<IVmafQualityRunner> 
+        createVmafQualityRunner(const char *model_path, bool enable_conf_interval);
+};
+
+#endif
+
 #endif /* _LIBVMAF_H */
diff --git a/wrapper/src/vmaf.cpp b/wrapper/src/vmaf.cpp
index a26c3a57f..5972d9c99 100644
--- a/wrapper/src/vmaf.cpp
+++ b/wrapper/src/vmaf.cpp
@@ -929,7 +929,10 @@ void BootstrapVmafQualityRunner::_set_prediction_result(
     result.set_scores("ci95_high", ci95HighScore);
 
     // num_models is same across frames, so just use first frame length
-    size_t num_models = predictionStructs.at(0).vmafMultiModelPrediction.size();
+    size_t num_models = 0; 
+    if (predictionStructs.size() > 0) {
+        num_models = predictionStructs.at(0).vmafMultiModelPrediction.size();
+    }
     std::vector<double> perModelScore;
     // name of the vmaf bootstrap model, e.g. vmaf_0001 is the first one
 
@@ -972,15 +975,7 @@ double RunVmaf(const char* fmt, int width, int height,
     }
 
     Asset asset(width, height, fmt);
-    std::unique_ptr<VmafQualityRunner> runner_ptr;
-    if (enable_conf_interval)
-    {
-        runner_ptr = std::unique_ptr<BootstrapVmafQualityRunner>(new BootstrapVmafQualityRunner(model_path));
-    }
-    else
-    {
-        runner_ptr = std::unique_ptr<VmafQualityRunner>(new VmafQualityRunner(model_path));
-    }
+    std::unique_ptr<IVmafQualityRunner> runner_ptr = VmafQualityRunnerFactory::createVmafQualityRunner(model_path, enable_conf_interval);
 
     Timer timer;
     timer.start();
diff --git a/wrapper/src/vmaf.h b/wrapper/src/vmaf.h
index 0a1a934d6..9a710052c 100644
--- a/wrapper/src/vmaf.h
+++ b/wrapper/src/vmaf.h
@@ -35,6 +35,7 @@
 #include "svm.h"
 #include "chooseser.h"
 #include "darray.h"
+#include "libvmaf.h"
 
 static const std::string BOOSTRAP_VMAF_MODEL_PREFIX = "vmaf_";
 
@@ -45,157 +46,6 @@ double RunVmaf(const char* fmt, int width, int height,
                bool do_psnr, bool do_ssim, bool do_ms_ssim,
                const char *pool_method, int n_thread, int n_subsample, bool enable_conf_interval);
 
-class Asset
-{
-public:
-    Asset(int w, int h, const char *fmt):
-        w(w), h(h), fmt(fmt) {}
-    Asset(int w, int h):
-        w(w), h(h), fmt("yuv420p") {}
-    int getWidth() { return w; }
-    int getHeight() { return h; }
-    const char* getFmt() { return fmt; }
-private:
-    const int w, h;
-    const char *fmt;
-};
-
-class StatVector
-{
-public:
-    StatVector() {}
-    StatVector(std::vector<double> l): l(l) {}
-    std::vector<double> getVector()
-    {
-        return l;
-    }
-    double mean()
-    {
-        _assert_size();
-        double sum = 0.0;
-        for (double e : l)
-        {
-            sum += e;
-        }
-        return sum / l.size();
-    }
-    double minimum()
-    {
-        _assert_size();
-        double min_ = l[0];
-        for (double e : l)
-        {
-            if (e < min_)
-            {
-                min_ = e;
-            }
-        }
-        return min_;
-    }
-    double harmonic_mean()
-    {
-        _assert_size();
-        double sum = 0.0;
-        for (double e: l)
-        {
-            sum += 1.0 / (e + 1.0);
-        }
-        return 1.0 / (sum / l.size()) - 1.0;
-    }
-    double second_moment()
-    {
-        _assert_size();
-        double sum = 0.0;
-        for (double e : l)
-        {
-            sum += pow(e, 2);
-        }
-        return sum / l.size();
-    }
-    double percentile(double perc)
-    {
-        _assert_size();
-        if (perc < 0.0) {
-            perc = 0.0;
-        }
-        else if (perc > 100.0) {
-            perc = 100.0;
-        }
-        std::vector<double> l(this->l);
-        std::sort(l.begin(), l.end());
-        double pos = perc * (this->l.size() - 1) / 100.0;
-        int pos_left = (int)floor(pos);
-        int pos_right = (int)ceil(pos);
-        if (pos_left == pos_right) {
-            return l[pos_left];
-        }
-        else {
-            return l[pos_left] * (pos_right - pos) + l[pos_right] * (pos - pos_left);
-        }
-
-    }
-    double var() { return second_moment() - pow(mean(), 2); }
-    double std() { return sqrt(var()); }
-    void append(double e) { l.push_back(e); }
-    double at(size_t idx) { return l.at(idx); }
-    size_t size() { return l.size(); }
-private:
-    std::vector<double> l;
-    void _assert_size() {
-        if (l.size() == 0) {
-            throw std::runtime_error("StatVector size is 0.");
-        }
-    }
-};
-
-enum ScoreAggregateMethod
-{
-    MEAN,
-    HARMONIC_MEAN,
-    MINIMUM
-};
-
-class Result
-{
-public:
-    Result(): score_aggregate_method(ScoreAggregateMethod::MEAN) {}
-    void set_scores(const std::string &key, const StatVector &scores) { d[key] = scores; }
-    StatVector get_scores(const std::string &key) { return d[key]; }
-    bool has_scores(const std::string &key) { return d.find(key) != d.end(); }
-    double get_score(const std::string &key)
-    {
-        StatVector list = get_scores(key);
-        if (score_aggregate_method == ScoreAggregateMethod::MINIMUM)
-        {
-            return list.minimum();
-        }
-        else if (score_aggregate_method == ScoreAggregateMethod::HARMONIC_MEAN)
-        {
-            return list.harmonic_mean();
-        }
-        else // MEAN
-        {
-            return list.mean();
-        }
-    }
-    std::vector<std::string> get_keys()
-    {
-        std::vector<std::string> v;
-        for (std::map<std::string, StatVector>::iterator it = d.begin(); it != d.end(); ++it)
-        {
-            v.push_back(it->first);
-        }
-        return v;
-    }
-    void setScoreAggregateMethod(ScoreAggregateMethod scoreAggregateMethod)
-    {
-        score_aggregate_method = scoreAggregateMethod;
-    }
-private:
-    std::map<std::string, StatVector> d;
-    ScoreAggregateMethod score_aggregate_method;
-};
-
 class VmafException: public std::exception
 {
 public:
@@ -267,11 +117,11 @@ class BootstrapLibsvmNusvrTrainTestModel: public LibsvmNusvrTrainTestModel {
     virtual void _assert_model_type(Val model_type);
 };
 
-class VmafQualityRunner
+class VmafQualityRunner : public IVmafQualityRunner
 {
 public:
     VmafQualityRunner(const char *model_path): model_path(model_path) {}
-    Result run(Asset asset, int (*read_frame)(float *ref_data, float *main_data, float *temp_data,
+    virtual Result run(Asset asset, int (*read_frame)(float *ref_data, float *main_data, float *temp_data,
                int stride, void *user_data), void *user_data, bool disable_clip, bool enable_transform,
                bool do_psnr, bool do_ssim, bool do_ms_ssim, int n_thread, int n_subsample);
     virtual ~VmafQualityRunner() {}

From 179156201c269aafdae51c98e9aae202ed220427 Mon Sep 17 00:00:00 2001
From: Zhi Li <zli@netflix.com>
Date: Thu, 31 Jan 2019 13:23:27 -0800
Subject: [PATCH 12/29] Update FAQ.md and libvmaf.md with information on using
 libvmaf with FFmpeg.

---
 FAQ.md                  |  4 ++--
 README.md               |  2 +-
 resource/doc/libvmaf.md | 18 ++++++++++++++++++
 3 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/FAQ.md b/FAQ.md
index 265b0ac2a..55aec45ff 100644
--- a/FAQ.md
+++ b/FAQ.md
@@ -56,12 +56,12 @@ A: This is due to the slightly different workflows used by `run_vmaf_training` a
 
 ### Q: How do I use VMAF with downscaled videos?
 
-If you have a distorted video that was scaled down (e.g. for adaptive streaming) and want to calculate VMAF, you can use ffmpeg with `libvmaf` to perform the re-scaling for you.
+If you have a distorted video that was scaled down (e.g. for adaptive streaming) and want to calculate VMAF, you can use FFmpeg with `libvmaf` to perform the re-scaling for you.
 
 For example, to upscale the distorted video to 1080p:
 
 ```
-ffmpeg -i main.mpg -i ref.mpg -filter_complex "[0:v]scale=1920:1080[main];[main][1:v]libvmaf" -f null -
+ffmpeg -i main.mpg -i ref.mpg -filter_complex "[0:v]scale=1920x1080:flags=bicubic[main];[main][1:v]libvmaf" -f null -
 ```
 
 This scales the first input video (`0:v`) and forwards it to VMAF (`libvmaf`) with the label `main`, where it is compared against the second input video, `1:v`.
diff --git a/README.md b/README.md
index be7bc1eea..e9f7b7d4d 100644
--- a/README.md
+++ b/README.md
@@ -25,7 +25,7 @@ There are a number of ways one can use the package:
 
   - [VMAF Python library](resource/doc/VMAF_Python_library.md) offers full functionalities including running basic VMAF command line, running VMAF on a batch of video files, training and testing a VMAF model on video datasets, and visualization tools, etc.
   - [`vmafossexec` - a C++ "wrapper" executable](resource/doc/vmafossexec.md) offers running the prediction part of the algorithm in full, such that one can easily deploy VMAF in a production environment without needing to configure the Python dependencies. Additionally, `vmafossexec` offers a number of exclusive features, such as 1) speed optimization using multi-threading and skipping frames, 2) optionally computing PSNR, SSIM and MS-SSIM metrics in the output.
-  - [`libvmaf.a` - a static library](resource/doc/libvmaf.md) offers an interface to incorporate VMAF into your C/C++ code. Using this library, VMAF is now included as a filter in [FFmpeg](http://ffmpeg.org/) main branch, and can be configured using: `./configure --enable-libvmaf --enable-version3`. See [this](https://ffmpeg.org/ffmpeg-filters.html#libvmaf) section for details. Using FFmpeg with `libvmaf` allows passing in compressed video bitstreams directly to VMAF.
+  - [`libvmaf.a` - a static library](resource/doc/libvmaf.md) offers an interface to incorporate VMAF into your C/C++ code. Using this library, VMAF is now included as a filter in [FFmpeg](http://ffmpeg.org/) main branch, and can be configured using: `./configure --enable-libvmaf --enable-version3`. See [this](resource/doc/libvmaf.md#use-libvmaf-with-ffmpeg) section for details. Using FFmpeg with `libvmaf` allows passing in compressed video bitstreams directly to VMAF.
   - [VMAF Dockerfile](Dockerfile) generates a VMAF docker image from the [VMAF Python library](resource/doc/VMAF_Python_library.md). Refer to [this](resource/doc/docker.md) document for detailed usages.
   - Build VMAF on Windows: follow instructions on [this](resource/doc/BuildForWindows.md) page.
 
diff --git a/resource/doc/libvmaf.md b/resource/doc/libvmaf.md
index 3dd2c2874..6a5fc895c 100644
--- a/resource/doc/libvmaf.md
+++ b/resource/doc/libvmaf.md
@@ -41,3 +41,21 @@ To uninstall the library run:
 make uninstall
 ```
 
+### Use libvmaf with FFmpeg
+
+After installing `libvmaf.a`, you can use it with FFmpeg. Under FFmpeg directory, configure, build and install FFmpeg with:
+
+```
+./configure --enable-libvmaf --enable-version3
+make install
+```
+
+Using FFmpeg with libvmaf is very powerful, as you can create complex filters to calculate VMAF directly on videos of different encoding formats and resolutions. For the best practices of computing VMAF at the right resolution, refer to our [techblog](https://medium.com/netflix-techblog/vmaf-the-journey-continues-44b51ee9ed12). Below is an example on how you can compare a downscaled video with its original 1080p source:
+
+```
+ffmpeg -i main.mpg -i ref.mpg -filter_complex "[0:v]scale=1920x1080:flags=bicubic[main];[main][1:v]libvmaf" -f null -
+```
+
+Here `main.mpg` is a downscaled and encoded video and `ref.mpg` is its reference source at 1080p. The command scales the first input video (`0:v`) and forwards it to VMAF (`libvmaf`) with the label `main`, where it is compared against the second input reference video, `1:v`. Bicubic upsampling is used (also see the [techblog](https://medium.com/netflix-techblog/vmaf-the-journey-continues-44b51ee9ed12) for the recommendation on upsampling methods).
+
+See the [FFmpeg Filtering Guide](https://trac.ffmpeg.org/wiki/FilteringGuide) for more examples of complex filters, and the [Scaling Guide](https://trac.ffmpeg.org/wiki/Scaling) for information about scaling and using different scaling algorithms.

From 9f85d5aae7235be5cd3f41ddfc8adc8ae78c70a0 Mon Sep 17 00:00:00 2001
From: Zhi Li <zli@netflix.com>
Date: Thu, 31 Jan 2019 13:25:55 -0800
Subject: [PATCH 13/29] Update FAQ.md and libvmaf.md with information on using
 libvmaf with FFmpeg.

---
 resource/doc/libvmaf.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/resource/doc/libvmaf.md b/resource/doc/libvmaf.md
index 6a5fc895c..46c3cd82f 100644
--- a/resource/doc/libvmaf.md
+++ b/resource/doc/libvmaf.md
@@ -53,7 +53,8 @@ make install
 Using FFmpeg with libvmaf is very powerful, as you can create complex filters to calculate VMAF directly on videos of different encoding formats and resolutions. For the best practices of computing VMAF at the right resolution, refer to our [techblog](https://medium.com/netflix-techblog/vmaf-the-journey-continues-44b51ee9ed12). Below is an example on how you can compare a downscaled video with its original 1080p source:
 
 ```
-ffmpeg -i main.mpg -i ref.mpg -filter_complex "[0:v]scale=1920x1080:flags=bicubic[main];[main][1:v]libvmaf" -f null -
+ffmpeg -i main.mpg -i ref.mpg -filter_complex \
+"[0:v]scale=1920x1080:flags=bicubic[main];[main][1:v]libvmaf" -f null -
 ```
 
 Here `main.mpg` is a downscaled and encoded video and `ref.mpg` is its reference source at 1080p. The command scales the first input video (`0:v`) and forwards it to VMAF (`libvmaf`) with the label `main`, where it is compared against the second input reference video, `1:v`. Bicubic upsampling is used (also see the [techblog](https://medium.com/netflix-techblog/vmaf-the-journey-continues-44b51ee9ed12) for the recommendation on upsampling methods).

From fc6b9263c418a53ddc16eef0574d9f20f5c00b39 Mon Sep 17 00:00:00 2001
From: Zhi Li <zli@netflix.com>
Date: Thu, 31 Jan 2019 13:52:56 -0800
Subject: [PATCH 14/29] Remove adm related non-optimized code.

---
 feature/src/adm.c       | 234 +---------------------------
 feature/src/adm_tools.c | 333 ----------------------------------------
 feature/src/adm_tools.h |  10 +-
 3 files changed, 3 insertions(+), 574 deletions(-)

diff --git a/feature/src/adm.c b/feature/src/adm.c
index 1123d710c..8e9160b35 100644
--- a/feature/src/adm.c
+++ b/feature/src/adm.c
@@ -39,10 +39,8 @@ typedef adm_dwt_band_t_s adm_dwt_band_t;
 #define adm_sum_cube  adm_sum_cube_s
 #define offset_image       offset_image_s
 
-#if ADM_OPT_ENABLE
-    #define adm_csf_den_scale adm_csf_den_scale_s
-    #define dwt2_src_indices_filt dwt2_src_indices_filt_s
-#endif
+#define adm_csf_den_scale adm_csf_den_scale_s
+#define dwt2_src_indices_filt dwt2_src_indices_filt_s
 
 static char *init_dwt_band(adm_dwt_band_t *band, char *data_top, size_t buf_sz_one)
 {
@@ -53,7 +51,6 @@ static char *init_dwt_band(adm_dwt_band_t *band, char *data_top, size_t buf_sz_o
     return data_top;
 }
 
-#if ADM_OPT_ENABLE
 static char *init_dwt_band_hvd(adm_dwt_band_t *band, char *data_top, size_t buf_sz_one)
 {
 	band->band_a = NULL;
@@ -274,233 +271,6 @@ int compute_adm(const float *ref, const float *dis, int w, int h, int ref_stride
 	aligned_free(buf_x_orig);
 	return ret;
 }
-#else // ADM_OPT_ENABLE
-int compute_adm(const float *ref, const float *dis, int w, int h, int ref_stride, int dis_stride, double *score, double *score_num, double *score_den, double *scores, double border_factor)
-{
-#ifdef ADM_OPT_SINGLE_PRECISION
-    double numden_limit = 1e-2 * (w * h) / (1920.0 * 1080.0);
-#else
-    double numden_limit = 1e-10 * (w * h) / (1920.0 * 1080.0);
-#endif
-    float *data_buf = 0;
-    char *data_top;
-
-    float *ref_scale;
-    float *dis_scale;
-
-    adm_dwt_band_t ref_dwt2;
-    adm_dwt_band_t dis_dwt2;
-
-    adm_dwt_band_t decouple_r;
-    adm_dwt_band_t decouple_a;
-
-    adm_dwt_band_t csf_o;
-    adm_dwt_band_t csf_r;
-    adm_dwt_band_t csf_a;
-
-    float *mta;
-
-    adm_dwt_band_t cm_r;
-
-    const float *curr_ref_scale = ref;
-    const float *curr_dis_scale = dis;
-    int curr_ref_stride = ref_stride;
-    int curr_dis_stride = dis_stride;
-
-    int orig_h = h;
-
-    int buf_stride = ALIGN_CEIL(((w + 1) / 2) * sizeof(float));
-    size_t buf_sz_one = (size_t)buf_stride * ((h + 1) / 2);
-
-    double num = 0;
-    double den = 0;
-
-    int scale;
-    int ret = 1;
-
-    if (SIZE_MAX / buf_sz_one < 35)
-    {
-        printf("error: SIZE_MAX / buf_sz_one < 35, buf_sz_one = %zu.\n", buf_sz_one);
-        fflush(stdout);
-        goto fail;
-    }
-
-    if (!(data_buf = aligned_malloc(buf_sz_one * 35, MAX_ALIGN)))
-    {
-        printf("error: aligned_malloc failed for data_buf.\n");
-        fflush(stdout);
-        goto fail;
-    }
-
-    data_top = (char *)data_buf;
-
-    ref_scale = (float *)data_top; data_top += buf_sz_one;
-    dis_scale = (float *)data_top; data_top += buf_sz_one;
-
-    data_top = init_dwt_band(&ref_dwt2, data_top, buf_sz_one);
-    data_top = init_dwt_band(&dis_dwt2, data_top, buf_sz_one);
-    data_top = init_dwt_band(&decouple_r, data_top, buf_sz_one);
-    data_top = init_dwt_band(&decouple_a, data_top, buf_sz_one);
-    data_top = init_dwt_band(&csf_o, data_top, buf_sz_one);
-    data_top = init_dwt_band(&csf_r, data_top, buf_sz_one);
-    data_top = init_dwt_band(&csf_a, data_top, buf_sz_one);
-
-    mta = (float *)data_top; data_top += buf_sz_one;
-
-    data_top = init_dwt_band(&cm_r, data_top, buf_sz_one);
-
-    for (scale = 0; scale < 4; ++scale) {
-#ifdef ADM_OPT_DEBUG_DUMP
-        char pathbuf[256];
-#endif
-        float num_scale = 0.0;
-        float den_scale = 0.0;
-
-        adm_dwt2(curr_ref_scale, &ref_dwt2, w, h, curr_ref_stride, buf_stride);
-        adm_dwt2(curr_dis_scale, &dis_dwt2, w, h, curr_dis_stride, buf_stride);
-
-        w = (w + 1) / 2;
-        h = (h + 1) / 2;
-
-        adm_decouple(&ref_dwt2, &dis_dwt2, &decouple_r, &decouple_a, w, h, buf_stride, buf_stride, buf_stride, buf_stride);
-
-        adm_csf(&ref_dwt2, &csf_o, orig_h, scale, w, h, buf_stride, buf_stride);
-        adm_csf(&decouple_r, &csf_r, orig_h, scale, w, h, buf_stride, buf_stride);
-        adm_csf(&decouple_a, &csf_a, orig_h, scale, w, h, buf_stride, buf_stride);
-
-        adm_cm_thresh(&csf_a, mta, w, h, buf_stride, buf_stride);
-        adm_cm(&csf_r, &cm_r, mta, w, h, buf_stride, buf_stride, buf_stride);
-
-#ifdef ADM_OPT_DEBUG_DUMP
-        sprintf(pathbuf, "stage/ref[%d]_a.yuv", scale);
-        write_image(pathbuf, ref_dwt2.band_a, w, h, buf_stride, sizeof(float));
-
-        sprintf(pathbuf, "stage/ref[%d]_h.yuv", scale);
-        write_image(pathbuf, ref_dwt2.band_h, w, h, buf_stride, sizeof(float));
-
-        sprintf(pathbuf, "stage/ref[%d]_v.yuv", scale);
-        write_image(pathbuf, ref_dwt2.band_v, w, h, buf_stride, sizeof(float));
-
-        sprintf(pathbuf, "stage/ref[%d]_d.yuv", scale);
-        write_image(pathbuf, ref_dwt2.band_d, w, h, buf_stride, sizeof(float));
-
-        sprintf(pathbuf, "stage/dis[%d]_a.yuv", scale);
-        write_image(pathbuf, dis_dwt2.band_a, w, h, buf_stride, sizeof(float));
-
-        sprintf(pathbuf, "stage/dis[%d]_h.yuv", scale);
-        write_image(pathbuf, dis_dwt2.band_h, w, h, buf_stride, sizeof(float));
-
-        sprintf(pathbuf, "stage/dis[%d]_v.yuv", scale);
-        write_image(pathbuf, dis_dwt2.band_v, w, h, buf_stride, sizeof(float));
-
-        sprintf(pathbuf, "stage/dis[%d]_d.yuv", scale);
-        write_image(pathbuf, dis_dwt2.band_d, w, h, buf_stride, sizeof(float));
-
-        sprintf(pathbuf, "stage/r[%d]_h.yuv", scale);
-        write_image(pathbuf, decouple_r.band_h, w, h, buf_stride, sizeof(float));
-
-        sprintf(pathbuf, "stage/r[%d]_v.yuv", scale);
-        write_image(pathbuf, decouple_r.band_v, w, h, buf_stride, sizeof(float));
-
-        sprintf(pathbuf, "stage/r[%d]_d.yuv", scale);
-        write_image(pathbuf, decouple_r.band_d, w, h, buf_stride, sizeof(float));
-
-        sprintf(pathbuf, "stage/a[%d]_h.yuv", scale);
-        write_image(pathbuf, decouple_a.band_h, w, h, buf_stride, sizeof(float));
-
-        sprintf(pathbuf, "stage/a[%d]_v.yuv", scale);
-        write_image(pathbuf, decouple_a.band_v, w, h, buf_stride, sizeof(float));
-
-        sprintf(pathbuf, "stage/a[%d]_d.yuv", scale);
-        write_image(pathbuf, decouple_a.band_d, w, h, buf_stride, sizeof(float));
-
-        sprintf(pathbuf, "stage/csf_o[%d]_h.yuv", scale);
-        write_image(pathbuf, csf_o.band_h, w, h, buf_stride, sizeof(float));
-
-        sprintf(pathbuf, "stage/csf_o[%d]_v.yuv", scale);
-        write_image(pathbuf, csf_o.band_v, w, h, buf_stride, sizeof(float));
-
-        sprintf(pathbuf, "stage/csf_o[%d]_d.yuv", scale);
-        write_image(pathbuf, csf_o.band_d, w, h, buf_stride, sizeof(float));
-
-        sprintf(pathbuf, "stage/csf_r[%d]_h.yuv", scale);
-        write_image(pathbuf, csf_r.band_h, w, h, buf_stride, sizeof(float));
-
-        sprintf(pathbuf, "stage/csf_r[%d]_v.yuv", scale);
-        write_image(pathbuf, csf_r.band_v, w, h, buf_stride, sizeof(float));
-
-        sprintf(pathbuf, "stage/csf_r[%d]_d.yuv", scale);
-        write_image(pathbuf, csf_r.band_d, w, h, buf_stride, sizeof(float));
-
-        sprintf(pathbuf, "stage/csf_a[%d]_h.yuv", scale);
-        write_image(pathbuf, csf_a.band_h, w, h, buf_stride, sizeof(float));
-
-        sprintf(pathbuf, "stage/csf_a[%d]_v.yuv", scale);
-        write_image(pathbuf, csf_a.band_v, w, h, buf_stride, sizeof(float));
-
-        sprintf(pathbuf, "stage/csf_a[%d]_d.yuv", scale);
-        write_image(pathbuf, csf_a.band_d, w, h, buf_stride, sizeof(float));
-
-        sprintf(pathbuf, "stage/mta[%d].yuv", scale);
-        write_image(pathbuf, mta, w, h, buf_stride, sizeof(float));
-
-        sprintf(pathbuf, "stage/cm_r[%d]_h.yuv", scale);
-        write_image(pathbuf, cm_r.band_h, w, h, buf_stride, sizeof(float));
-
-        sprintf(pathbuf, "stage/cm_r[%d]_v.yuv", scale);
-        write_image(pathbuf, cm_r.band_v, w, h, buf_stride, sizeof(float));
-
-        sprintf(pathbuf, "stage/cm_r[%d]_d.yuv", scale);
-        write_image(pathbuf, cm_r.band_d, w, h, buf_stride, sizeof(float));
-#endif
-        num_scale += adm_sum_cube(cm_r.band_h, w, h, buf_stride, border_factor);
-        num_scale += adm_sum_cube(cm_r.band_v, w, h, buf_stride, border_factor);
-        num_scale += adm_sum_cube(cm_r.band_d, w, h, buf_stride, border_factor);
-
-        den_scale += adm_sum_cube(csf_o.band_h, w, h, buf_stride, border_factor);
-        den_scale += adm_sum_cube(csf_o.band_v, w, h, buf_stride, border_factor);
-        den_scale += adm_sum_cube(csf_o.band_d, w, h, buf_stride, border_factor);
-
-        num += num_scale;
-        den += den_scale;
-
-        /* Copy DWT2 approximation band to buffer for next scale. */
-        adm_buffer_copy(ref_dwt2.band_a, ref_scale, w * sizeof(float), h, buf_stride, buf_stride);
-        adm_buffer_copy(dis_dwt2.band_a, dis_scale, w * sizeof(float), h, buf_stride, buf_stride);
-
-        curr_ref_scale = ref_scale;
-        curr_dis_scale = dis_scale;
-        curr_ref_stride = buf_stride;
-        curr_dis_stride = buf_stride;
-#ifdef ADM_OPT_DEBUG_DUMP
-        printf("num: %f\n", num);
-        printf("den: %f\n", den);
-#endif
-        scores[2*scale+0] = num_scale;
-        scores[2*scale+1] = den_scale;
-    }
-
-    num = num < numden_limit ? 0 : num;
-    den = den < numden_limit ? 0 : den;
-
-    if (den == 0.0)
-    {
-        *score = 1.0f;
-    }
-    else
-    {
-        *score = num / den;
-    }
-    *score_num = num;
-    *score_den = den;
-
-    ret = 0;
-
-fail:
-    aligned_free(data_buf);
-    return ret;
-}
-#endif
 
 int adm(int (*read_frame)(float *ref_data, float *main_data, float *temp_data, int stride, void *user_data), void *user_data, int w, int h, const char *fmt)
 {
diff --git a/feature/src/adm_tools.c b/feature/src/adm_tools.c
index 5cb57aecd..d5578c706 100644
--- a/feature/src/adm_tools.c
+++ b/feature/src/adm_tools.c
@@ -48,14 +48,12 @@ static float rcp_s(float x)
 static const float dwt2_db2_coeffs_lo_s[4] = { 0.482962913144690, 0.836516303737469, 0.224143868041857, -0.129409522550921 };
 static const float dwt2_db2_coeffs_hi_s[4] = { -0.129409522550921, -0.224143868041857, 0.836516303737469, -0.482962913144690 };
 
-#if ADM_OPT_ENABLE
 #ifndef FLOAT_ONE_BY_30
 #define FLOAT_ONE_BY_30	0.0333333351
 #endif
 
 #ifndef FLOAT_ONE_BY_15
 #define FLOAT_ONE_BY_15 0.0666666701
-#endif
 
 static const float fcoeff_cm_thresh_s[3][3] =
 {
@@ -93,7 +91,6 @@ float adm_sum_cube_s(const float *x, int w, int h, int stride, double border_fac
     return powf(accum, 1.0f / 3.0f) + powf((bottom - top) * (right - left) / 32.0f, 1.0f / 3.0f);
 }
 
-#if ADM_OPT_ENABLE
 void adm_decouple_s(const adm_dwt_band_t_s *ref, const adm_dwt_band_t_s *dis, const adm_dwt_band_t_s *r, const adm_dwt_band_t_s *a, int w, int h, int ref_stride, int dis_stride, int r_stride, int a_stride, double border_factor)
 {
 #ifdef ADM_OPT_AVOID_ATAN
@@ -210,107 +207,7 @@ void adm_decouple_s(const adm_dwt_band_t_s *ref, const adm_dwt_band_t_s *dis, co
 		}
 	}
 }
-#else
-void adm_decouple_s(const adm_dwt_band_t_s *ref, const adm_dwt_band_t_s *dis, const adm_dwt_band_t_s *r, const adm_dwt_band_t_s *a, int w, int h, int ref_stride, int dis_stride, int r_stride, int a_stride)
-{
-#ifdef ADM_OPT_AVOID_ATAN
-    const float cos_1deg_sq = cos(1.0 * M_PI / 180.0) * cos(1.0 * M_PI / 180.0);
-#endif
-    const float eps = 1e-30;
-
-    int ref_px_stride = ref_stride / sizeof(float);
-    int dis_px_stride = dis_stride / sizeof(float);
-    int r_px_stride = r_stride / sizeof(float);
-    int a_px_stride = a_stride / sizeof(float);
-
-    float oh, ov, od, th, tv, td;
-    float kh, kv, kd, tmph, tmpv, tmpd;
-#ifdef ADM_OPT_AVOID_ATAN
-    float ot_dp, o_mag_sq, t_mag_sq;
-#else
-    float oa, ta, diff;
-#endif
-    int angle_flag;
-    int i, j;
-
-    for (i = 0; i < h; ++i) {
-        for (j = 0; j < w; ++j) {
-            oh = ref->band_h[i * ref_px_stride + j];
-            ov = ref->band_v[i * ref_px_stride + j];
-            od = ref->band_d[i * ref_px_stride + j];
-            th = dis->band_h[i * dis_px_stride + j];
-            tv = dis->band_v[i * dis_px_stride + j];
-            td = dis->band_d[i * dis_px_stride + j];
-
-            kh = DIVS(th, oh + eps);
-            kv = DIVS(tv, ov + eps);
-            kd = DIVS(td, od + eps);
-
-            kh = kh < 0.0f ? 0.0f : (kh > 1.0f ? 1.0f : kh);
-            kv = kv < 0.0f ? 0.0f : (kv > 1.0f ? 1.0f : kv);
-            kd = kd < 0.0f ? 0.0f : (kd > 1.0f ? 1.0f : kd);
-
-            tmph = kh * oh;
-            tmpv = kv * ov;
-            tmpd = kd * od;
-#ifdef ADM_OPT_AVOID_ATAN
-            /* Determine if angle between (oh,ov) and (th,tv) is less than 1 degree.
-             * Given that u is the angle (oh,ov) and v is the angle (th,tv), this can
-             * be done by testing the inequvality.
-             *
-             * { (u.v.) >= 0 } AND { (u.v)^2 >= cos(1deg)^2 * ||u||^2 * ||v||^2 }
-             *
-             * Proof:
-             *
-             * cos(theta) = (u.v) / (||u|| * ||v||)
-             *
-             * IF u.v >= 0 THEN
-             *   cos(theta)^2 = (u.v)^2 / (||u||^2 * ||v||^2)
-             *   (u.v)^2 = cos(theta)^2 * ||u||^2 * ||v||^2
-             *
-             *   IF |theta| < 1deg THEN
-             *     (u.v)^2 >= cos(1deg)^2 * ||u||^2 * ||v||^2
-             *   END
-             * ELSE
-             *   |theta| > 90deg
-             * END
-             */
-            ot_dp = oh * th + ov * tv;
-            o_mag_sq = oh * oh + ov * ov;
-            t_mag_sq = th * th + tv * tv;
-
-            angle_flag = (ot_dp >= 0.0f) && (ot_dp * ot_dp >= cos_1deg_sq * o_mag_sq * t_mag_sq);
-#else
-            oa = atanf(DIVS(ov, oh + eps));
-            ta = atanf(DIVS(tv, th + eps));
-
-            if (oh < 0.0f)
-                oa += (float)M_PI;
-            if (th < 0.0f)
-                ta += (float)M_PI;
-
-            diff = fabsf(oa - ta) * 180.0f / M_PI;
-            angle_flag = diff < 1.0f;
-#endif
-            if (angle_flag) {
-                tmph = th;
-                tmpv = tv;
-                tmpd = td;                
-            }
-
-            r->band_h[i * r_px_stride + j] = tmph;
-            r->band_v[i * r_px_stride + j] = tmpv;
-            r->band_d[i * r_px_stride + j] = tmpd;
-
-            a->band_h[i * a_px_stride + j] = th - tmph;
-            a->band_v[i * a_px_stride + j] = tv - tmpv;
-            a->band_d[i * a_px_stride + j] = td - tmpd;
-        }
-    }
-}
-#endif
 
-#if ADM_OPT_ENABLE
 void adm_csf_s(const adm_dwt_band_t_s *src, const adm_dwt_band_t_s *dst, int orig_h, int scale, int w, int h, int src_stride, int dst_stride, double border_factor)
 {
 	const float *src_angles[3] = { src->band_h, src->band_v, src->band_d };
@@ -360,40 +257,7 @@ void adm_csf_s(const adm_dwt_band_t_s *src, const adm_dwt_band_t_s *dst, int ori
 		}
 	}
 }
-#else
-void adm_csf_s(const adm_dwt_band_t_s *src, const adm_dwt_band_t_s *dst, int orig_h, int scale, int w, int h, int src_stride, int dst_stride)
-{
-    const float *src_angles[3] = { src->band_h, src->band_v, src->band_d };
-    float *dst_angles[3]       = { dst->band_h, dst->band_v, dst->band_d };
-
-    const float *src_ptr;
-    float *dst_ptr;
-
-    int src_px_stride = src_stride / sizeof(float);
-    int dst_px_stride = dst_stride / sizeof(float);
-
-    // for ADM: scales goes from 0 to 3 but in noise floor paper, it goes from
-    // 1 to 4 (from finest scale to coarsest scale).
-    float factor1 = dwt_quant_step(&dwt_7_9_YCbCr_threshold[0], scale, 1);
-    float factor2 = dwt_quant_step(&dwt_7_9_YCbCr_threshold[0], scale, 2);
-    float rfactor[3] = {1.0f / factor1, 1.0f / factor1, 1.0f / factor2};
-
-    int i, j, theta;
-
-    for (theta = 0; theta < 3; ++theta) {
-        src_ptr = src_angles[theta];
-        dst_ptr = dst_angles[theta];
-
-        for (i = 0; i < h; ++i) {
-            for (j = 0; j < w; ++j) {
-                dst_ptr[i * dst_px_stride + j] = rfactor[theta] * src_ptr[i * src_px_stride + j];
-            }
-        }
-    }
-}
-#endif
 
-#if ADM_OPT_ENABLE
 /* Combination of adm_csf_s and adm_sum_cube_s for csf_o based den_scale */
 float adm_csf_den_scale_s(const adm_dwt_band_t_s *src, int orig_h, int scale, int w, int h, int src_stride, double border_factor)
 {
@@ -454,9 +318,7 @@ float adm_csf_den_scale_s(const adm_dwt_band_t_s *src, int orig_h, int scale, in
 	return(den_scale_h + den_scale_v + den_scale_d);
 
 }
-#endif
 
-#if ADM_OPT_ENABLE
 void adm_cm_thresh_s(const adm_dwt_band_t_s *src, float *dst, int w, int h, int src_stride, int dst_stride)
 {
 	const float *angles[3] = { src->band_h, src->band_v, src->band_d };
@@ -521,62 +383,7 @@ void adm_cm_thresh_s(const adm_dwt_band_t_s *src, float *dst, int w, int h, int
 	}
 
 }
-#else
-void adm_cm_thresh_s(const adm_dwt_band_t_s *src, float *dst, int w, int h, int src_stride, int dst_stride)
-{
-    const float *angles[3] = { src->band_h, src->band_v, src->band_d };
-    const float *src_ptr;
-
-    int src_px_stride = src_stride / sizeof(float);
-    int dst_px_stride = dst_stride / sizeof(float);
-
-    float fcoeff, imgcoeff;
-
-    int theta, i, j, fi, fj, ii, jj;
-
-    for (i = 0; i < h; ++i) {
-        /* Zero output row. */
-        for (j = 0; j < w; ++j) {
-            dst[i * dst_px_stride + j] = 0;
-        }
 
-        for (theta = 0; theta < 3; ++theta) {
-            src_ptr = angles[theta];
-
-            for (j = 0; j < w; ++j) {
-                float accum = 0;
-
-                /* Mean of three convolutions by [1 1 1; 1 2 1; 1 1 1]. */
-                for (fi = 0; fi < 3; ++fi) {
-                    for (fj = 0; fj < 3; ++fj) {
-                        fcoeff = (fi == 1 && fj == 1) ? 1.0f / 15.0f : 1.0f / 30.0f;
-
-                        ii = i - 1 + fi;
-                        jj = j - 1 + fj;
-
-                        /* Border handling by mirroring. */
-                        if (ii < 0)
-                            ii = -ii;
-                        else if (ii >= h)
-                            ii = 2 * h - ii - 1;
-                        if (jj < 0)
-                            jj = -jj;
-                        else if (jj >= w)
-                            jj = 2 * w - jj - 1;
-                        imgcoeff = fabsf(src_ptr[ii * src_px_stride + jj]);
-
-                        accum += fcoeff * imgcoeff;
-                    }
-                }
-
-                dst[i * dst_px_stride + j] += accum;
-            }
-        }
-    }
-}
-#endif
-
-#if ADM_OPT_ENABLE
 float adm_cm_s(const adm_dwt_band_t_s *src, const adm_dwt_band_t_s *dst, const adm_dwt_band_t_s *csf_a, int w, int h, int src_stride, int dst_stride, int csf_a_stride, double border_factor, int scale)
 {
 	/* Take decouple_r as src and do dsf_s on decouple_r here to get csf_r */
@@ -818,41 +625,7 @@ float adm_cm_s(const adm_dwt_band_t_s *src, const adm_dwt_band_t_s *dst, const a
 
 	return (num_scale_h + num_scale_v + num_scale_d);
 }
-#else
-void adm_cm_s(const adm_dwt_band_t_s *src, const adm_dwt_band_t_s *dst, const float *thresh, int w, int h, int src_stride, int dst_stride, int thresh_stride)
-{
-    int src_px_stride = src_stride / sizeof(float);
-    int dst_px_stride = dst_stride / sizeof(float);
-    int thresh_px_stride = thresh_stride / sizeof(float);
-
-    float xh, xv, xd, thr;
-
-    int i, j;
-
-    for (i = 0; i < h; ++i) {
-        for (j = 0; j < w; ++j) {
-            xh  = src->band_h[i * src_px_stride + j];
-            xv  = src->band_v[i * src_px_stride + j];
-            xd  = src->band_d[i * src_px_stride + j];
-            thr = thresh[i * thresh_px_stride + j];
-
-            xh = fabsf(xh) - thr;
-            xv = fabsf(xv) - thr;
-            xd = fabsf(xd) - thr;
-
-            xh = xh < 0.0f ? 0.0f : xh;
-            xv = xv < 0.0f ? 0.0f : xv;
-            xd = xd < 0.0f ? 0.0f : xd;
-
-            dst->band_h[i * dst_px_stride + j] = xh;
-            dst->band_v[i * dst_px_stride + j] = xv;
-            dst->band_d[i * dst_px_stride + j] = xd;
-        }
-    }
-}
-#endif
 
-#if ADM_OPT_ENABLE
 // This function stores the imgcoeff values used in adm_dwt2_s in buffers, which reduces the control code cycles.
 void dwt2_src_indices_filt_s(int **src_ind_y, int **src_ind_x, int w, int h)
 {
@@ -902,9 +675,6 @@ void dwt2_src_indices_filt_s(int **src_ind_y, int **src_ind_x, int w, int h)
 	}
 }
 
-#endif
-
-#if ADM_OPT_ENABLE
 void adm_dwt2_s(const float *src, const adm_dwt_band_t_s *dst, int **ind_y, int **ind_x, int w, int h, int src_stride, int dst_stride)
 {
 	const float *filter_lo = dwt2_db2_coeffs_lo_s;
@@ -997,109 +767,6 @@ void adm_dwt2_s(const float *src, const adm_dwt_band_t_s *dst, int **ind_y, int
 	aligned_free(tmplo);
 	aligned_free(tmphi);
 }
-#else
-void adm_dwt2_s(const float *src, const adm_dwt_band_t_s *dst, int w, int h, int src_stride, int dst_stride)
-{
-    const float *filter_lo = dwt2_db2_coeffs_lo_s;
-    const float *filter_hi = dwt2_db2_coeffs_hi_s;
-    int fwidth = sizeof(dwt2_db2_coeffs_lo_s) / sizeof(float);
-
-    int src_px_stride = src_stride / sizeof(float);
-    int dst_px_stride = dst_stride / sizeof(float);
-
-    float *tmplo = aligned_malloc(ALIGN_CEIL(sizeof(float) * w), MAX_ALIGN);
-    float *tmphi = aligned_malloc(ALIGN_CEIL(sizeof(float) * w), MAX_ALIGN);
-    float fcoeff_lo, fcoeff_hi, imgcoeff;
-
-    int i, j, fi, fj, ii, jj;
-
-    for (i = 0; i < (h + 1) / 2; ++i) {
-        /* Vertical pass. */
-        for (j = 0; j < w; ++j) {
-            float accum_lo = 0;
-            float accum_hi = 0;
-
-            for (fi = 0; fi < fwidth; ++fi) {
-                fcoeff_lo = filter_lo[fi];
-                fcoeff_hi = filter_hi[fi];
-
-                /* Border handling by mirroring. */
-                ii = 2 * i - 1 + fi;
-
-                if (ii < 0)
-                    ii = -ii;
-                else if (ii >= h)
-                    ii = 2 * h - ii - 1;
-
-                imgcoeff = src[ii * src_px_stride + j];
-
-                accum_lo += fcoeff_lo * imgcoeff;
-                accum_hi += fcoeff_hi * imgcoeff;
-            }
-
-            tmplo[j] = accum_lo;
-            tmphi[j] = accum_hi;
-        }
-
-        /* Horizontal pass (lo). */
-        for (j = 0; j < (w + 1) / 2; ++j) {
-            float accum_lo = 0;
-            float accum_hi = 0;
-
-            for (fj = 0; fj < fwidth; ++fj) {
-                fcoeff_lo = filter_lo[fj];
-                fcoeff_hi = filter_hi[fj];
-
-                /* Border handling by mirroring. */
-                jj = 2 * j - 1 + fj;
-
-                if (jj < 0)
-                    jj = -jj;
-                else if (jj >= w)
-                    jj = 2 * w - jj - 1;
-
-                imgcoeff = tmplo[jj];
-
-                accum_lo += fcoeff_lo * imgcoeff;
-                accum_hi += fcoeff_hi * imgcoeff;
-            }
-
-            dst->band_a[i * dst_px_stride + j] = accum_lo;
-            dst->band_v[i * dst_px_stride + j] = accum_hi;
-        }
-
-        /* Horizontal pass (hi). */
-        for (j = 0; j < (w + 1) / 2; ++j) {
-            float accum_lo = 0;
-            float accum_hi = 0;
-
-            for (fj = 0; fj < fwidth; ++fj) {
-                fcoeff_lo = filter_lo[fj];
-                fcoeff_hi = filter_hi[fj];
-
-                /* Border handling by mirroring. */
-                jj = 2 * j - 1 + fj;
-
-                if (jj < 0)
-                    jj = -jj;
-                else if (jj >= w)
-                    jj = 2 * w - jj - 1;
-
-                imgcoeff = tmphi[jj];
-
-                accum_lo += fcoeff_lo * imgcoeff;
-                accum_hi += fcoeff_hi * imgcoeff;
-            }
-
-            dst->band_h[i * dst_px_stride + j] = accum_lo;
-            dst->band_d[i * dst_px_stride + j] = accum_hi;
-        }
-    }
-
-    aligned_free(tmplo);
-    aligned_free(tmphi);
-}
-#endif
 
 void adm_buffer_copy(const void *src, void *dst, int linewidth, int h, int src_stride, int dst_stride)
 {
diff --git a/feature/src/adm_tools.h b/feature/src/adm_tools.h
index a9c93f591..c95b1c47c 100644
--- a/feature/src/adm_tools.h
+++ b/feature/src/adm_tools.h
@@ -25,10 +25,7 @@
 #ifndef ADM_TOOLS_H_
 #define ADM_TOOLS_H_
 
-#define ADM_OPT_ENABLE 1
-
-#if ADM_OPT_ENABLE
-// i = 0, j = 0: indices y: 1,0,1, x: 1,0,1 
+// i = 0, j = 0: indices y: 1,0,1, x: 1,0,1
 #define ADM_CM_THRESH_S_0_0(angles,src_px_stride,accum,w,h,i,j) \
 { \
 	*accum = 0; \
@@ -125,7 +122,6 @@
 	} \
 	*accum = sum; \
 }
-#endif
 
 typedef struct adm_dwt_band_t_s {
     float *band_a; /* Low-pass V + low-pass H. */
@@ -134,8 +130,6 @@ typedef struct adm_dwt_band_t_s {
     float *band_d; /* High-pass V + high-pass H. */
 } adm_dwt_band_t_s;
 
-#if ADM_OPT_ENABLE
-
 float adm_sum_cube_s(const float *x, int w, int h, int stride, double border_factor);
 
 void adm_decouple_s(const adm_dwt_band_t_s *ref, const adm_dwt_band_t_s *dis, const adm_dwt_band_t_s *r, const adm_dwt_band_t_s *a, int w, int h, int ref_stride, int dis_stride, int r_stride, int a_stride, double border_factor);
@@ -168,8 +162,6 @@ void adm_dwt2_s(const float *src, const adm_dwt_band_t_s *dst, int w, int h, int
 
 void adm_buffer_copy(const void *src, void *dst, int linewidth, int h, int src_stride, int dst_stride);
 
-#endif
-
 /* ================= */
 /* Noise floor model */
 /* ================= */

From fb80c21392ff2d7d5544492bd26d8304ec597718 Mon Sep 17 00:00:00 2001
From: Zhi Li <zli@netflix.com>
Date: Thu, 31 Jan 2019 14:17:27 -0800
Subject: [PATCH 15/29] Remove buffer related non-optimized code.

---
 feature/src/common/blur_array.c |  41 ---------
 feature/src/common/blur_array.h |  14 ---
 wrapper/src/combo.c             | 154 +-------------------------------
 wrapper/src/combo.h             |   2 -
 wrapper/src/darray.c            |   6 --
 5 files changed, 2 insertions(+), 215 deletions(-)

diff --git a/feature/src/common/blur_array.c b/feature/src/common/blur_array.c
index 0714e1a34..c4f53a224 100644
--- a/feature/src/common/blur_array.c
+++ b/feature/src/common/blur_array.c
@@ -21,9 +21,7 @@ int init_blur_array(BLUR_BUF_ARRAY* arr, int array_length, size_t size, size_t a
     {
         arr->blur_buf_array[i].frame_idx = -1;
         arr->blur_buf_array[i].blur_buf = aligned_malloc(size, alignement);
-#if BUF_OPT_ENABLE
 		arr->blur_buf_array[i].reference_count	= 0;
-#endif
         if (arr->blur_buf_array[i].blur_buf == 0)
             return 0;
 
@@ -41,7 +39,6 @@ int init_blur_array(BLUR_BUF_ARRAY* arr, int array_length, size_t size, size_t a
  */
 float* get_blur_buf(BLUR_BUF_ARRAY* arr, int search_frame_idx)
 {
-#if BUF_OPT_ENABLE
     int array_length = arr->actual_length;
     BLUR_BUF_STRUCT* s = arr->blur_buf_array;
 	float *ret = NULL;
@@ -66,31 +63,6 @@ float* get_blur_buf(BLUR_BUF_ARRAY* arr, int search_frame_idx)
     pthread_mutex_unlock(&arr->block);
 
     return ret;
-#else
-    // find item for the search_frame_idx
-    while (1)
-    {
-        pthread_mutex_lock(&arr->block);
-
-        int array_length = arr->actual_length;
-        BLUR_BUF_STRUCT* s = arr->blur_buf_array;
-
-        for (int i = 0; i < array_length; i++)
-        {
-            if (s->frame_idx == search_frame_idx)
-            {
-                pthread_mutex_unlock(&arr->block);
-                return s->blur_buf;
-            }
-
-            // next array item
-            s++;
-        }
-
-        pthread_mutex_unlock(&arr->block);
-    }
-#endif
-    return 0;
 }
 
 /*
@@ -127,11 +99,7 @@ int put_blur_buf(BLUR_BUF_ARRAY* arr, int frame_idx, float* blur_buf)
 /*
  * resets the slot in the array to -1 to indicate that the buffer can be used again
  */
-#if BUF_OPT_ENABLE
 int release_blur_buf_slot(BLUR_BUF_ARRAY* arr, int search_frame_idx)
-#else
-int release_blur_buf(BLUR_BUF_ARRAY* arr, int search_frame_idx)
-#endif
 {
     int ret = 0;
     int array_length = arr->actual_length;
@@ -143,7 +111,6 @@ int release_blur_buf(BLUR_BUF_ARRAY* arr, int search_frame_idx)
     {
         if (s->frame_idx == search_frame_idx)
         {
-#if BUF_OPT_ENABLE
 			if(s->reference_count <= 0)
 			{
 				s->frame_idx = -1;
@@ -153,10 +120,6 @@ int release_blur_buf(BLUR_BUF_ARRAY* arr, int search_frame_idx)
 			{
 				ret = -1;
 			}
-#else
-            s->frame_idx = -1;
-            ret = 1;
-#endif			
             break;
         }
 
@@ -189,7 +152,6 @@ void free_blur_buf(BLUR_BUF_ARRAY* arr)
     pthread_mutex_destroy(&arr->block);
 }
 
-#if BUF_OPT_ENABLE
 /*
  * finds a free slot in the array, assigns the new frame index and returns the free buffer pointer
  * This increases the reference count for this slot
@@ -278,6 +240,3 @@ int release_blur_buf_reference(BLUR_BUF_ARRAY* arr, int search_frame_idx)
 
     return ret;
 }
-
-
-#endif
diff --git a/feature/src/common/blur_array.h b/feature/src/common/blur_array.h
index 1209a590f..ddf38125b 100644
--- a/feature/src/common/blur_array.h
+++ b/feature/src/common/blur_array.h
@@ -12,12 +12,6 @@
 #include "pthread.h"
 #include "alloc.h"
 
-#ifdef MULTI_THREADING
-#define BUF_OPT_ENABLE 1
-#else
-#define BUF_OPT_ENABLE 0
-#endif
-
 #define MAX_NUM_THREADS 128
 typedef struct
 {
@@ -39,8 +33,6 @@ typedef struct
 
 int init_blur_array(BLUR_BUF_ARRAY* arr, int array_length, size_t size, size_t alignement);
 
-#if BUF_OPT_ENABLE
-
 float* get_free_blur_buf_slot(BLUR_BUF_ARRAY* arr, int frame_idx);
 
 int get_blur_buf_reference_count(BLUR_BUF_ARRAY* arr, int frame_idx);
@@ -49,12 +41,6 @@ int release_blur_buf_slot(BLUR_BUF_ARRAY* arr, int search_frame_idx);
 
 int release_blur_buf_reference(BLUR_BUF_ARRAY* arr, int search_frame_idx);
 
-#else
-
-int release_blur_buf(BLUR_BUF_ARRAY* arr, int search_frame_idx);
-
-#endif
-
 float* get_blur_buf(BLUR_BUF_ARRAY* arr, int search_frame_idx);
 
 int put_blur_buf(BLUR_BUF_ARRAY* arr, int frame_idx, float* blur_buf);
diff --git a/wrapper/src/combo.c b/wrapper/src/combo.c
index c96c451b9..d12526b9f 100644
--- a/wrapper/src/combo.c
+++ b/wrapper/src/combo.c
@@ -99,9 +99,7 @@ void* combo_threadfunc(void* vmaf_thread_data)
     int ret = 0;
     bool next_frame_read;
 
-#if BUF_OPT_ENABLE		
     bool offset_flag;
-#endif
 
 #ifdef MULTI_THREADING
     float *prev_blur_buf_ = 0;
@@ -110,46 +108,6 @@ void* combo_threadfunc(void* vmaf_thread_data)
     float *blur_buf_ = 0;
 #endif
 
-#if !BUF_OPT_ENABLE		
-    if (!(ref_buf = aligned_malloc(data_sz, MAX_ALIGN)))
-    {
-        sprintf(errmsg, "aligned_malloc failed for ref_buf.\n");
-        goto fail_or_end;
-    }
-    if (!(next_ref_buf = aligned_malloc(data_sz, MAX_ALIGN)))
-    {
-        sprintf(errmsg, "aligned_malloc failed for next_ref_buf.\n");
-        goto fail_or_end;
-    }
-
-    if (!(dis_buf = aligned_malloc(data_sz, MAX_ALIGN)))
-    {
-        sprintf(errmsg, "aligned_malloc failed for dis_buf.\n");
-        goto fail_or_end;
-    }
-    if (!(next_dis_buf = aligned_malloc(data_sz, MAX_ALIGN)))
-    {
-        sprintf(errmsg, "aligned_malloc failed for next_dis_buf.\n");
-        goto fail_or_end;
-    }
-
-    if (!(prev_blur_buf = aligned_malloc(data_sz, MAX_ALIGN)))
-    {
-        sprintf(errmsg, "aligned_malloc failed for prev_blur_buf.\n");
-        goto fail_or_end;
-    }
-    if (!(blur_buf = aligned_malloc(data_sz, MAX_ALIGN)))
-    {
-        sprintf(errmsg, "aligned_malloc failed for blur_buf.\n");
-        goto fail_or_end;
-    }
-    if (!(next_blur_buf = aligned_malloc(data_sz, MAX_ALIGN)))
-    {
-        sprintf(errmsg, "aligned_malloc failed for next_blur_buf.\n");
-        goto fail_or_end;
-    }
-#endif
-
     // use temp_buf for convolution_f32_c, and fread u and v
     if (!(temp_buf = aligned_malloc(data_sz * 2, MAX_ALIGN)))
     {
@@ -179,7 +137,6 @@ void* combo_threadfunc(void* vmaf_thread_data)
 
         if (frm_idx == 0)
         {
-#if BUF_OPT_ENABLE				
             // Allocating the free buffers from buffer array
             blur_buf    = get_free_blur_buf_slot(&thread_data->blur_buf_array, frm_idx);
             ref_buf     = get_free_blur_buf_slot(&thread_data->ref_buf_array, frm_idx);
@@ -194,7 +151,6 @@ void* combo_threadfunc(void* vmaf_thread_data)
 #endif
                 goto fail_or_end;
             }
-#endif
 
             // read frame from file
 
@@ -231,16 +187,12 @@ void* combo_threadfunc(void* vmaf_thread_data)
             convolution_f32_c(FILTER_5, 5, ref_buf, blur_buf, temp_buf, w, h, stride / sizeof(float), stride / sizeof(float));
 
 #ifdef MULTI_THREADING
-#if !BUF_OPT_ENABLE
-            put_blur_buf(&thread_data->blur_buf_array, frm_idx, blur_buf);
-#endif
 #endif
 
         }
 #ifdef MULTI_THREADING
         else
         {
-#if BUF_OPT_ENABLE
             // retrieve from buffer array
             ref_buf     = get_blur_buf(&thread_data->ref_buf_array, frm_idx);
             dis_buf     = get_blur_buf(&thread_data->dis_buf_array, frm_idx);
@@ -255,25 +207,9 @@ void* combo_threadfunc(void* vmaf_thread_data)
 #endif
                 goto fail_or_end;
             }
-#else
-            // retrieve from buffer array
-
-            ref_buf_ = get_blur_buf(&thread_data->ref_buf_array, frm_idx);
-            memcpy(ref_buf, ref_buf_, data_sz);
-            release_blur_buf(&thread_data->ref_buf_array, frm_idx);
-
-            dis_buf_ = get_blur_buf(&thread_data->dis_buf_array, frm_idx);
-            memcpy(dis_buf, dis_buf_, data_sz);
-            release_blur_buf(&thread_data->dis_buf_array, frm_idx);
-
-            blur_buf_ = get_blur_buf(&thread_data->blur_buf_array, frm_idx);
-            memcpy(blur_buf, blur_buf_, data_sz);
-            // don't releave blur_buf_array of frm_idx yet, since it will be used by the next frame again
-#endif			
         }
 #endif
 
-#if BUF_OPT_ENABLE
         // Allocate free buffer from the buffer array for next frame index
         next_ref_buf 	= get_free_blur_buf_slot(&thread_data->ref_buf_array, frm_idx + 1);
         next_dis_buf 	= get_free_blur_buf_slot(&thread_data->dis_buf_array, frm_idx + 1);
@@ -286,7 +222,6 @@ void* combo_threadfunc(void* vmaf_thread_data)
 #endif
             goto fail_or_end;
         }
-#endif
 
         ret = thread_data->read_frame(next_ref_buf, next_dis_buf, temp_buf, stride, user_data);
         if (ret == 1)
@@ -309,15 +244,8 @@ void* combo_threadfunc(void* vmaf_thread_data)
             next_frame_read = true;
         }
 
-#if !BUF_OPT_ENABLE		
-#ifdef MULTI_THREADING
-        pthread_mutex_unlock(&thread_data->mutex_readframe);
-#endif
-#endif
-
         if (next_frame_read)
         {
-#if BUF_OPT_ENABLE			
             next_blur_buf     = get_free_blur_buf_slot(&thread_data->blur_buf_array, frm_idx + 1);
             if(NULL == next_blur_buf)
             {
@@ -327,7 +255,6 @@ void* combo_threadfunc(void* vmaf_thread_data)
 #endif
                 goto fail_or_end;
             }
-#endif
             // ===============================================================
             // offset pixel by OPT_RANGE_PIXEL_OFFSET
             // ===============================================================
@@ -342,23 +269,13 @@ void* combo_threadfunc(void* vmaf_thread_data)
             // ===============================================================
             convolution_f32_c(FILTER_5, 5, next_ref_buf, next_blur_buf, temp_buf, w, h, stride / sizeof(float), stride / sizeof(float));
 
-#if !BUF_OPT_ENABLE					
-#ifdef MULTI_THREADING
-            // save next_ref_buf, next_ref_buf and next_ref_buf to buffer array
-            put_blur_buf(&thread_data->ref_buf_array, frm_idx + 1, next_ref_buf);
-            put_blur_buf(&thread_data->dis_buf_array, frm_idx + 1, next_dis_buf);
-            put_blur_buf(&thread_data->blur_buf_array, frm_idx + 1, next_blur_buf);
-#endif
-#endif
         }
 
-#if BUF_OPT_ENABLE
         // release ref and dis buffer references after blur buf computation
         release_blur_buf_reference(&thread_data->ref_buf_array, frm_idx + 1);
         release_blur_buf_reference(&thread_data->dis_buf_array, frm_idx + 1);
 #ifdef MULTI_THREADING
         pthread_mutex_unlock(&thread_data->mutex_readframe);
-#endif
 #endif
         dbg_printf("frame: %d, ", frm_idx);
 
@@ -367,7 +284,6 @@ void* combo_threadfunc(void* vmaf_thread_data)
         // step they have been offset by OPT_RANGE_PIXEL_OFFSET, now
         // offset them back.
         // ===============================================================
-#if BUF_OPT_ENABLE
         // offset back the buffers only if required
         if (frm_idx % n_subsample == 0 && ( (thread_data->psnr_array != NULL) || (thread_data->ssim_array != NULL) || (thread_data->ms_ssim_array != NULL) ))
         {
@@ -375,10 +291,6 @@ void* combo_threadfunc(void* vmaf_thread_data)
             offset_image(dis_buf, -OPT_RANGE_PIXEL_OFFSET, w, h, stride);
             offset_flag = true;
 		}
-#else
-        offset_image(ref_buf, -OPT_RANGE_PIXEL_OFFSET, w, h, stride);
-        offset_image(dis_buf, -OPT_RANGE_PIXEL_OFFSET, w, h, stride);
-#endif
         if (frm_idx % n_subsample == 0 && thread_data->psnr_array != NULL)
         {
             /* =========== psnr ============== */
@@ -427,17 +339,12 @@ void* combo_threadfunc(void* vmaf_thread_data)
         // ===============================================================
         // for the rest, offset pixel by OPT_RANGE_PIXEL_OFFSET
         // ===============================================================
-#if BUF_OPT_ENABLE
         if(offset_flag)
         {
             offset_image(ref_buf, OPT_RANGE_PIXEL_OFFSET, w, h, stride);
             offset_image(dis_buf, OPT_RANGE_PIXEL_OFFSET, w, h, stride);
             offset_flag = false;
 		}
-#else		
-        offset_image(ref_buf, OPT_RANGE_PIXEL_OFFSET, w, h, stride);
-        offset_image(dis_buf, OPT_RANGE_PIXEL_OFFSET, w, h, stride);
-#endif
 
         /* =========== adm ============== */
         if (frm_idx % n_subsample == 0)
@@ -519,7 +426,6 @@ void* combo_threadfunc(void* vmaf_thread_data)
             else
             {
 #ifdef MULTI_THREADING
-#if BUF_OPT_ENABLE
                 // avoid multiple memory copies
                 prev_blur_buf = get_blur_buf(&thread_data->blur_buf_array, frm_idx - 1);
                 if(NULL == prev_blur_buf)
@@ -528,10 +434,6 @@ void* combo_threadfunc(void* vmaf_thread_data)
                     sprintf(errmsg, "Data not available for prev_blur_buf.\n");
                     goto fail_or_end;
                 }
-#else				
-                prev_blur_buf_ = get_blur_buf(&thread_data->blur_buf_array, frm_idx - 1);
-                memcpy(prev_blur_buf, prev_blur_buf_, data_sz);
-#endif
 #endif
                 if ((ret = compute_motion(prev_blur_buf, blur_buf, w, h, stride, stride, &score)))
                 {
@@ -539,11 +441,7 @@ void* combo_threadfunc(void* vmaf_thread_data)
                     goto fail_or_end;
                 }
 #ifdef MULTI_THREADING
-#if BUF_OPT_ENABLE
                 release_blur_buf_reference(&thread_data->blur_buf_array, frm_idx - 1);
-#else
-                release_blur_buf(&thread_data->blur_buf_array, frm_idx - 1);
-#endif
 #endif
 
                 if (next_frame_read)
@@ -559,9 +457,6 @@ void* combo_threadfunc(void* vmaf_thread_data)
                 {
                     score2 = score;
 #ifdef MULTI_THREADING
-#if !BUF_OPT_ENABLE
-                    release_blur_buf(&thread_data->blur_buf_array, frm_idx); // no more next frames, release this one too
-#endif
 #endif
                 }
             }
@@ -573,27 +468,9 @@ void* combo_threadfunc(void* vmaf_thread_data)
             insert_array_at(thread_data->motion2_array, score2, frm_idx);
 
         }
-#if !BUF_OPT_ENABLE
-        else
-        {
-#ifdef MULTI_THREADING
-            if (frm_idx == 0) {}
-            else
-            {
-                release_blur_buf(&thread_data->blur_buf_array, frm_idx - 1);
-                if (next_frame_read) {}
-                else
-                {
-                    release_blur_buf(&thread_data->blur_buf_array, frm_idx); // no more next frames, release this one too
-                }
-            }
-#endif
-        }
-#else
         /* Indicate that motion score computation for this frame is complete */
         insert_array_at(thread_data->motion_score_compute_flag_array, 1.0, frm_idx);
         release_blur_buf_reference(&thread_data->blur_buf_array, frm_idx + 1);
-#endif
 
         /* =========== vif ============== */
 
@@ -630,7 +507,6 @@ void* combo_threadfunc(void* vmaf_thread_data)
 
         dbg_printf("\n");
 
-#if BUF_OPT_ENABLE
         //Release references to reference and distorted buffers
         release_blur_buf_reference(&thread_data->ref_buf_array, frm_idx);
         release_blur_buf_reference(&thread_data->dis_buf_array, frm_idx);
@@ -676,15 +552,6 @@ void* combo_threadfunc(void* vmaf_thread_data)
             release_blur_buf_slot(&thread_data->dis_buf_array, frm_idx + 1);
             release_blur_buf_slot(&thread_data->blur_buf_array, frm_idx);
         }
-#else	
-#ifndef MULTI_THREADING
-        // copy to prev_buf
-        memcpy(prev_blur_buf, blur_buf, data_sz);
-        memcpy(ref_buf, next_ref_buf, data_sz);
-        memcpy(dis_buf, next_dis_buf, data_sz);
-        memcpy(blur_buf, next_blur_buf, data_sz);
-#endif
-#endif
 
         if (!next_frame_read)
         {
@@ -698,15 +565,6 @@ void* combo_threadfunc(void* vmaf_thread_data)
 
 fail_or_end:
 
-#if !BUF_OPT_ENABLE
-    aligned_free(ref_buf);
-    aligned_free(dis_buf);
-    aligned_free(prev_blur_buf);
-    aligned_free(next_ref_buf);
-    aligned_free(next_dis_buf);
-    aligned_free(next_blur_buf);
-    aligned_free(blur_buf);
-#endif	
     aligned_free(temp_buf);
 
 #ifdef MULTI_THREADING
@@ -788,11 +646,9 @@ int combo(int (*read_frame)(float *ref_data, float *main_data, float *temp_data,
     combo_thread_data.stop_threads = 0;
     combo_thread_data.n_subsample = n_subsample;
 
-#if BUF_OPT_ENABLE
     DArray	motion_score_compute_flag_array;
     init_array(&motion_score_compute_flag_array, 1000);
     combo_thread_data.motion_score_compute_flag_array = &motion_score_compute_flag_array;
-#endif
 
     // sanity check for width/height
     if (w <= 0 || h <= 0 || (size_t)w > ALIGN_FLOOR(INT_MAX) / sizeof(float))
@@ -827,7 +683,7 @@ int combo(int (*read_frame)(float *ref_data, float *main_data, float *temp_data,
     }
 
     // for motion analysis we compare to previous buffer and next buffer
-#if BUF_OPT_ENABLE
+
     /*
      *	In the multi-thread mode, allocate a fixed size buffer pool for the reference, distorted and blur buffers.
      *	At any point, the no. of required ref and dis buffers is 1 more than the total no. of allotted threads,
@@ -838,11 +694,7 @@ int combo(int (*read_frame)(float *ref_data, float *main_data, float *temp_data,
     init_blur_array(&combo_thread_data.ref_buf_array, combo_thread_data.thread_count + 1, combo_thread_data.data_sz, MAX_ALIGN);
     init_blur_array(&combo_thread_data.dis_buf_array, combo_thread_data.thread_count + 1, combo_thread_data.data_sz, MAX_ALIGN);
     init_blur_array(&combo_thread_data.blur_buf_array, 3 * (combo_thread_data.thread_count), combo_thread_data.data_sz, MAX_ALIGN);
-#else	
-    init_blur_array(&combo_thread_data.ref_buf_array, combo_thread_data.thread_count, combo_thread_data.data_sz, MAX_ALIGN);
-    init_blur_array(&combo_thread_data.dis_buf_array, combo_thread_data.thread_count, combo_thread_data.data_sz, MAX_ALIGN);
-    init_blur_array(&combo_thread_data.blur_buf_array, combo_thread_data.thread_count + 2, combo_thread_data.data_sz, MAX_ALIGN);
-#endif
+
     // initialize the mutex that protects the read_frame function
     pthread_mutex_init(&combo_thread_data.mutex_readframe, NULL);
 
@@ -881,9 +733,7 @@ int combo(int (*read_frame)(float *ref_data, float *main_data, float *temp_data,
     free_blur_buf(&combo_thread_data.dis_buf_array);
     free_blur_buf(&combo_thread_data.blur_buf_array);
 
-#if BUF_OPT_ENABLE	
     free_array(&motion_score_compute_flag_array);
-#endif
 
     free(thread);
 
diff --git a/wrapper/src/combo.h b/wrapper/src/combo.h
index 1b335cc64..2c5473ec8 100644
--- a/wrapper/src/combo.h
+++ b/wrapper/src/combo.h
@@ -78,9 +78,7 @@ typedef struct
     BLUR_BUF_ARRAY blur_buf_array;
     BLUR_BUF_ARRAY ref_buf_array;
     BLUR_BUF_ARRAY dis_buf_array;
-#if BUF_OPT_ENABLE
 	DArray *motion_score_compute_flag_array;
-#endif
 #endif
     int ret;
 
diff --git a/wrapper/src/darray.c b/wrapper/src/darray.c
index a0fcf0a1e..90447b93e 100644
--- a/wrapper/src/darray.c
+++ b/wrapper/src/darray.c
@@ -23,9 +23,7 @@
 void init_array(DArray *a, size_t init_size)
 {
     a->array = (double *)malloc(init_size * sizeof(double));
-#if BUF_OPT_ENABLE	
 	memset(a->array, 0.0, init_size * sizeof(double));
-#endif
     a->used = 0;
     a->size = init_size;
 #ifdef MULTI_THREADING
@@ -41,12 +39,10 @@ void insert_array(DArray *a, double e)
     if (a->used == a->size)
     {
         a->size *= 2;
-#if BUF_OPT_ENABLE
 		double *temp;
 		temp = a->array;
 		temp += (a->size / 2);
 		memset(temp, 0.0, (a->size / 2) * sizeof(double));
-#endif
         a->array = (double *)realloc(a->array, a->size * sizeof(double));
     }
     a->array[a->used++] = e;
@@ -69,12 +65,10 @@ void insert_array_at(DArray *a, double e, int pos)
     {
         a->size *= 2;
         a->array = (double *)realloc(a->array, a->size * sizeof(double));
-#if BUF_OPT_ENABLE
 		double *temp;
 		temp = a->array;
 		temp += (a->size / 2);
 		memset(temp, 0.0, (a->size / 2) * sizeof(double));
-#endif
     }
     a->array[pos] = e;
 #ifdef MULTI_THREADING

From 4370c12d56436343fa8dd804a9d78e76b3f577e2 Mon Sep 17 00:00:00 2001
From: Zhi Li <zli@netflix.com>
Date: Thu, 31 Jan 2019 14:31:21 -0800
Subject: [PATCH 16/29] Remove vif related non-optimized code.

---
 feature/src/common/convolution.h          |  3 +-
 feature/src/common/convolution_avx.c      |  5 +-
 feature/src/common/convolution_internal.h |  3 +-
 feature/src/vif.c                         | 77 +----------------------
 feature/src/vif_options.h                 |  6 --
 feature/src/vif_tools.c                   | 64 +------------------
 feature/src/vif_tools.h                   |  2 -
 7 files changed, 8 insertions(+), 152 deletions(-)

diff --git a/feature/src/common/convolution.h b/feature/src/common/convolution.h
index df7990018..1fb6344e0 100644
--- a/feature/src/common/convolution.h
+++ b/feature/src/common/convolution.h
@@ -37,9 +37,8 @@ void convolution_f32_c_s(const float *filter, int filter_width, const float *src
 
 void convolution_f32_avx_s(const float *filter, int filter_width, const float *src, float *dst, float *tmp, int width, int height, int src_stride, int dst_stride);
 
-#if VIF_OPT_ENABLE
 void convolution_f32_avx_sq_s(const float *filter, int filter_width, const float *src, float *dst, float *tmp, int width, int height, int src_stride, int dst_stride);
 
 void convolution_f32_avx_xy_s(const float *filter, int filter_width, const float *src1, const float *src2, float *dst, float *tmp, int width, int height, int src1_stride, int src2_stride, int dst_stride);
-#endif
+
 #endif // CONVOLUTION_H_
diff --git a/feature/src/common/convolution_avx.c b/feature/src/common/convolution_avx.c
index 44d22c504..c11d4540d 100644
--- a/feature/src/common/convolution_avx.c
+++ b/feature/src/common/convolution_avx.c
@@ -29,7 +29,6 @@ FORCE_INLINE inline void convolution_f32_avx_s_1d_v_scanline_5(const float * RES
 FORCE_INLINE inline void convolution_f32_avx_s_1d_v_scanline_9(const float * RESTRICT filter, int filter_width, const float * RESTRICT src, float * RESTRICT dst, int src_stride, int j_end);
 FORCE_INLINE inline void convolution_f32_avx_s_1d_v_scanline_17(const float * RESTRICT filter, int filter_width, const float * RESTRICT src, float * RESTRICT dst, int src_stride, int j_end);
 
-#if VIF_OPT_ENABLE
 FORCE_INLINE inline void convolution_f32_avx_s_1d_h_sq_scanline_5(const float * RESTRICT filter, int filter_width, const float * RESTRICT src, float * RESTRICT dst, int j_end);
 FORCE_INLINE inline void convolution_f32_avx_s_1d_h_sq_scanline_9(const float * RESTRICT filter, int filter_width, const float * RESTRICT src, float * RESTRICT dst, int j_end);
 FORCE_INLINE inline void convolution_f32_avx_s_1d_h_sq_scanline_17(const float * RESTRICT filter, int filter_width, const float * RESTRICT src, float * RESTRICT dst, int j_end);
@@ -43,7 +42,7 @@ FORCE_INLINE inline void convolution_f32_avx_s_1d_h_xy_scanline_17(const float *
 FORCE_INLINE inline void convolution_f32_avx_s_1d_v_xy_scanline_5(const float * RESTRICT filter, int filter_width, const float * RESTRICT src1, const float * RESTRICT src2, float * RESTRICT dst, int src1_stride, int src2_stride, int j_end);
 FORCE_INLINE inline void convolution_f32_avx_s_1d_v_xy_scanline_9(const float * RESTRICT filter, int filter_width, const float * RESTRICT src1, const float * RESTRICT src2, float * RESTRICT dst, int src1_stride, int src2_stride, int j_end);
 FORCE_INLINE inline void convolution_f32_avx_s_1d_v_xy_scanline_17(const float * RESTRICT filter, int filter_width, const float * RESTRICT src1, const float * RESTRICT src2, float * RESTRICT dst, int src1_stride, int src2_stride, int j_end);
-#endif
+
 FORCE_INLINE inline static void convolution_f32_avx_s_3x3_2d_scanline(const float * RESTRICT filter, const float * RESTRICT src, float * RESTRICT dst, int src_stride, int j_end)
 {
 	__m256 f00, f01, f02, f10, f11, f12, f20, f21, f22;
@@ -873,7 +872,6 @@ void convolution_f32_avx_s(const float *filter, int filter_width, const float *s
 	}
 }
 
-#if VIF_OPT_ENABLE
 // Filter a single scanline.
 FORCE_INLINE inline static void convolution_f32_avx_s_1d_h_sq_scanline(int N, const float * RESTRICT filter, int filter_width, const float * RESTRICT src, float * RESTRICT dst, int j_end)
 {
@@ -2647,4 +2645,3 @@ void convolution_f32_avx_xy_s(const float *filter, int filter_width, const float
 		break;
 	}
 }
-#endif
diff --git a/feature/src/common/convolution_internal.h b/feature/src/common/convolution_internal.h
index 1bf5a3863..7dac2d21a 100644
--- a/feature/src/common/convolution_internal.h
+++ b/feature/src/common/convolution_internal.h
@@ -51,7 +51,6 @@ FORCE_INLINE inline float convolution_edge_s(bool horizontal, const float *filte
 	return accum;
 }
 
-#if VIF_OPT_ENABLE 
 FORCE_INLINE inline float convolution_edge_sq_s(bool horizontal, const float *filter, int filter_width, const float *src, int width, int height, int stride, int i, int j)
 {
 	int radius = filter_width / 2;
@@ -110,5 +109,5 @@ FORCE_INLINE inline float convolution_edge_xy_s(bool horizontal, const float *fi
 	}
 	return accum;
 }
-#endif
+
 #endif // CONVOLUTION_INTERNAL_H_
diff --git a/feature/src/vif.c b/feature/src/vif.c
index b3f13169b..65faa1968 100644
--- a/feature/src/vif.c
+++ b/feature/src/vif.c
@@ -42,10 +42,9 @@
 #define vif_statistic      vif_statistic_s
 #define offset_image       offset_image_s
 
-#if VIF_OPT_ENABLE
 #define vif_filter1d_sq    vif_filter1d_sq_s
 #define vif_filter1d_xy    vif_filter1d_xy_s
-#endif
+
 int compute_vif(const float *ref, const float *dis, int w, int h, int ref_stride, int dis_stride, double *score, double *score_num, double *score_den, double *scores)
 {
     float *data_buf = 0;
@@ -64,17 +63,8 @@ int compute_vif(const float *ref, const float *dis, int w, int h, int ref_stride
     float *ref_dis_filt;
     float *tmpbuf;
 
-
-#if VIF_OPT_ENABLE
-    float *num_array;
-    float *den_array;
-#else
-	float *mu1_sq;
-	float *mu2_sq;
-	float *mu1_mu2;
     float *num_array;
     float *den_array;
-#endif
 
     /* Offset pointers to adjust for convolution border handling. */
     float *mu1_adj = 0;
@@ -106,8 +96,8 @@ int compute_vif(const float *ref, const float *dis, int w, int h, int ref_stride
 
     int scale;
     int ret = 1;
-#if VIF_OPT_ENABLE
-	// Code optimized to save on multiple buffer copies 
+
+	// Code optimized to save on multiple buffer copies
 	// hence the reduction in the number of buffers required from 15 to 10 
 #define VIF_BUF_CNT 10	
 	if (SIZE_MAX / buf_sz_one < VIF_BUF_CNT)
@@ -136,41 +126,6 @@ int compute_vif(const float *ref, const float *dis, int w, int h, int ref_stride
 	num_array    = (float *)data_top; data_top += buf_sz_one;
     den_array    = (float *)data_top; data_top += buf_sz_one;
 	tmpbuf = (float *)data_top; data_top += buf_sz_one;
-#else
-
-    if (SIZE_MAX / buf_sz_one < 15)
-    {
-        printf("error: SIZE_MAX / buf_sz_one < 15, buf_sz_one = %zu.\n", buf_sz_one);
-        fflush(stdout);
-        goto fail_or_end;
-    }
-
-    if (!(data_buf = aligned_malloc(buf_sz_one * 16, MAX_ALIGN)))
-    {
-        printf("error: aligned_malloc failed for data_buf.\n");
-        fflush(stdout);
-        goto fail_or_end;
-    }
-
-    data_top = (char *)data_buf;
-
-    ref_scale = (float *)data_top; data_top += buf_sz_one;
-    dis_scale = (float *)data_top; data_top += buf_sz_one;
-    ref_sq    = (float *)data_top; data_top += buf_sz_one;
-    dis_sq    = (float *)data_top; data_top += buf_sz_one;
-    ref_dis   = (float *)data_top; data_top += buf_sz_one;
-    mu1          = (float *)data_top; data_top += buf_sz_one;
-    mu2          = (float *)data_top; data_top += buf_sz_one;
-    mu1_sq       = (float *)data_top; data_top += buf_sz_one;
-    mu2_sq       = (float *)data_top; data_top += buf_sz_one;
-    mu1_mu2      = (float *)data_top; data_top += buf_sz_one;
-    ref_sq_filt  = (float *)data_top; data_top += buf_sz_one;
-    dis_sq_filt  = (float *)data_top; data_top += buf_sz_one;
-    ref_dis_filt = (float *)data_top; data_top += buf_sz_one;
-    num_array    = (float *)data_top; data_top += buf_sz_one;
-    den_array    = (float *)data_top; data_top += buf_sz_one;
-    tmpbuf    = (float *)data_top; data_top += buf_sz_one;
-#endif
 
     for (scale = 0; scale < 4; ++scale)
     {
@@ -237,36 +192,19 @@ int compute_vif(const float *ref, const float *dis, int w, int h, int ref_stride
         vif_filter2d(filter, curr_ref_scale, mu1, w, h, curr_ref_stride, buf_stride, filter_width);
         vif_filter2d(filter, curr_dis_scale, mu2, w, h, curr_dis_stride, buf_stride, filter_width);
 #endif
-#if !VIF_OPT_ENABLE
-        vif_xx_yy_xy(mu1, mu2, mu1_sq, mu2_sq, mu1_mu2, w, h, buf_stride, buf_stride, buf_stride, buf_stride, buf_stride);
-
-        vif_xx_yy_xy(curr_ref_scale, curr_dis_scale, ref_sq, dis_sq, ref_dis, w, h, curr_ref_stride, curr_dis_stride, buf_stride, buf_stride, buf_stride);
-#endif
 #ifdef VIF_OPT_FILTER_1D
-#if VIF_OPT_ENABLE
-
 		// Code optimized by adding intrinsic code for the functions, 
 		// vif_filter1d_sq and vif_filter1d_sq
 		vif_filter1d_sq(filter, curr_ref_scale, ref_sq_filt, tmpbuf, w, h, curr_ref_stride, buf_stride, filter_width);
 		vif_filter1d_sq(filter, curr_dis_scale, dis_sq_filt, tmpbuf, w, h, curr_dis_stride, buf_stride, filter_width);
 		vif_filter1d_xy(filter, curr_ref_scale, curr_dis_scale, ref_dis_filt, tmpbuf, w, h, curr_ref_stride, curr_dis_stride, buf_stride, filter_width);
-#else
-        vif_filter1d(filter, ref_sq, ref_sq_filt, tmpbuf, w, h, buf_stride, buf_stride, filter_width);
-        vif_filter1d(filter, dis_sq, dis_sq_filt, tmpbuf, w, h, buf_stride, buf_stride, filter_width);
-        vif_filter1d(filter, ref_dis, ref_dis_filt, tmpbuf, w, h, buf_stride, buf_stride, filter_width);
-#endif
 #else
         vif_filter2d(filter, ref_sq, ref_sq_filt, w, h, buf_stride, buf_stride, filter_width);
         vif_filter2d(filter, dis_sq, dis_sq_filt, w, h, buf_stride, buf_stride, filter_width);
         vif_filter2d(filter, ref_dis, ref_dis_filt, w, h, buf_stride, buf_stride, filter_width);
 #endif
-#if VIF_OPT_ENABLE
 		vif_statistic(mu1, mu2, NULL, ref_sq_filt, dis_sq_filt, ref_dis_filt, num_array, den_array,
 			w, h, buf_stride, buf_stride, buf_stride, buf_stride, buf_stride, buf_stride, buf_stride, buf_stride);
-#else
-        vif_statistic(mu1_sq, mu2_sq, mu1_mu2, ref_sq_filt, dis_sq_filt, ref_dis_filt, num_array, den_array,
-                      w, h, buf_stride, buf_stride, buf_stride, buf_stride, buf_stride, buf_stride, buf_stride, buf_stride);
-#endif
         mu1_adj = ADJUST(mu1);
         mu2_adj = ADJUST(mu2);
 
@@ -276,10 +214,6 @@ int compute_vif(const float *ref, const float *dis, int w, int h, int ref_stride
         ref_dis_filt_adj = ADJUST(ref_dis_filt);
 #endif
 
-#if !VIF_OPT_ENABLE
-        num_array_adj = ADJUST(num_array);
-        den_array_adj = ADJUST(den_array);
-#endif
 #undef ADJUST
 
 #ifdef VIF_OPT_DEBUG_DUMP
@@ -311,13 +245,8 @@ int compute_vif(const float *ref, const float *dis, int w, int h, int ref_stride
         write_image(pathbuf, den_array_adj, buf_valid_w, buf_valid_h, buf_stride, sizeof(float));
 #endif
 
-#if VIF_OPT_ENABLE
 		num = *num_array;
 		den = *den_array;
-#else
-        num = vif_sum(num_array_adj, buf_valid_w, buf_valid_h, buf_stride);
-        den = vif_sum(den_array_adj, buf_valid_w, buf_valid_h, buf_stride);
-#endif
 
         scores[2*scale] = num;
         scores[2*scale+1] = den;
diff --git a/feature/src/vif_options.h b/feature/src/vif_options.h
index 61fe2ae49..ef1f93b2e 100644
--- a/feature/src/vif_options.h
+++ b/feature/src/vif_options.h
@@ -36,10 +36,4 @@
 /* Whether to use a 1-D formulation of the Gaussian filter. */
 #define VIF_OPT_FILTER_1D
 
-/* VIF optimizations are enabled only for ID filter */ 
-#ifdef VIF_OPT_FILTER_1D
-#define VIF_OPT_ENABLE 1
-#else
-#define VIF_OPT_ENABLE 0
-#endif
 #endif /* VIF_OPTIONS_H_ */
diff --git a/feature/src/vif_tools.c b/feature/src/vif_tools.c
index 8f753fec5..6e66a3355 100644
--- a/feature/src/vif_tools.c
+++ b/feature/src/vif_tools.c
@@ -213,7 +213,6 @@ void vif_xx_yy_xy_s(const float *x, const float *y, float *xx, float *yy, float
     }
 }
 
-#if VIF_OPT_ENABLE
 void vif_statistic_s(const float *mu1, const float *mu2, const float *mu1_mu2, const float *xx_filt, const float *yy_filt, const float *xy_filt, float *num, float *den,
 	int w, int h, int mu1_stride, int mu2_stride, int mu1_mu2_stride, int xx_filt_stride, int yy_filt_stride, int xy_filt_stride, int num_stride, int den_stride)
 {
@@ -282,64 +281,6 @@ void vif_statistic_s(const float *mu1, const float *mu2, const float *mu1_mu2, c
 	num[0] = accum_num;
 	den[0] = accum_den;
 }
-#else
-void vif_statistic_s(const float *mu1_sq, const float *mu2_sq, const float *mu1_mu2, const float *xx_filt, const float *yy_filt, const float *xy_filt, float *num, float *den,
-                     int w, int h, int mu1_sq_stride, int mu2_sq_stride, int mu1_mu2_stride, int xx_filt_stride, int yy_filt_stride, int xy_filt_stride, int num_stride, int den_stride)
-{
-    static const float sigma_nsq = 2;
-    static const float sigma_max_inv = 4.0/(255.0*255.0);
-
-    int mu1_sq_px_stride  = mu1_sq_stride / sizeof(float);
-    int mu2_sq_px_stride  = mu2_sq_stride / sizeof(float);
-    int mu1_mu2_px_stride = mu1_mu2_stride / sizeof(float);
-    int xx_filt_px_stride = xx_filt_stride / sizeof(float);
-    int yy_filt_px_stride = yy_filt_stride / sizeof(float);
-    int xy_filt_px_stride = xy_filt_stride / sizeof(float);
-    int num_px_stride = num_stride / sizeof(float);
-    int den_px_stride = den_stride / sizeof(float);
-
-    float mu1_sq_val, mu2_sq_val, mu1_mu2_val, xx_filt_val, yy_filt_val, xy_filt_val;
-    float sigma1_sq, sigma2_sq, sigma12, g, sv_sq;
-    float num_val, den_val;
-    int i, j;
-
-    for (i = 0; i < h; ++i) {
-        for (j = 0; j < w; ++j) {
-            mu1_sq_val  = mu1_sq[i * mu1_sq_px_stride + j]; // same name as the Matlab code vifp_mscale.m
-            mu2_sq_val  = mu2_sq[i * mu2_sq_px_stride + j];
-            mu1_mu2_val = mu1_mu2[i * mu1_mu2_px_stride + j];
-            xx_filt_val = xx_filt[i * xx_filt_px_stride + j];
-            yy_filt_val = yy_filt[i * yy_filt_px_stride + j];
-            xy_filt_val = xy_filt[i * xy_filt_px_stride + j];
-
-            sigma1_sq = xx_filt_val - mu1_sq_val;
-            sigma2_sq = yy_filt_val - mu2_sq_val;
-            sigma12   = xy_filt_val - mu1_mu2_val;
-
-            if (sigma1_sq < sigma_nsq) {
-                num_val = 1.0 - sigma2_sq*sigma_max_inv;
-                den_val = 1.0;
-            }
-            else {
-                    sv_sq = (sigma2_sq + sigma_nsq) * sigma1_sq;
-                                if( sigma12 < 0 )
-                                {
-                                    num_val = 0.0;
-                                }
-                                else
-                                {
-                        g = sv_sq - sigma12 * sigma12;
-                    num_val = log2f(sv_sq / g);
-                                }
-                den_val = log2f(1.0f + sigma1_sq / sigma_nsq);
-            }
-
-            num[i * num_px_stride + j] = num_val;
-            den[i * den_px_stride + j] = den_val;
-        }
-    }
-}
-#endif
 
 void vif_filter1d_s(const float *f, const float *src, float *dst, float *tmpbuf, int w, int h, int src_stride, int dst_stride, int fwidth)
 {
@@ -402,8 +343,8 @@ void vif_filter1d_s(const float *f, const float *src, float *dst, float *tmpbuf,
 
     aligned_free(tmp);
 }
-#if	VIF_OPT_ENABLE
-// Code optimized by adding intrinsic code for the functions, 
+
+// Code optimized by adding intrinsic code for the functions,
 // vif_filter1d_sq and vif_filter1d_sq
 
 void vif_filter1d_sq_s(const float *f, const float *src, float *dst, float *tmpbuf, int w, int h, int src_stride, int dst_stride, int fwidth)
@@ -531,7 +472,6 @@ void vif_filter1d_xy_s(const float *f, const float *src1, const float *src2, flo
 
 	aligned_free(tmp);
 }
-#endif
 
 void vif_filter2d_s(const float *f, const float *src, float *dst, int w, int h, int src_stride, int dst_stride, int fwidth)
 {
diff --git a/feature/src/vif_tools.h b/feature/src/vif_tools.h
index d2a1bd97d..459b934f9 100644
--- a/feature/src/vif_tools.h
+++ b/feature/src/vif_tools.h
@@ -43,11 +43,9 @@ void vif_statistic_s(const float *mu1_sq, const float *mu2_sq, const float *mu1_
 
 void vif_filter1d_s(const float *f, const float *src, float *dst, float *tmpbuf, int w, int h, int src_stride, int dst_stride, int fwidth);
 
-#if VIF_OPT_ENABLE
 void vif_filter1d_sq_s(const float *f, const float *src, float *dst, float *tmpbuf, int w, int h, int src_stride, int dst_stride, int fwidth);
 
 void vif_filter1d_xy_s(const float *f, const float *src1, const float *src2, float *dst, float *tmpbuf, int w, int h, int src1_stride, int src2_stride, int dst_stride, int fwidth);
-#endif
 
 void vif_filter2d_s(const float *f, const float *src, float *dst, int w, int h, int src_stride, int dst_stride, int fwidth);
 

From c1626ff2734c1db40c38ca6fa5adf63282512efb Mon Sep 17 00:00:00 2001
From: Zhi Li <zli@netflix.com>
Date: Thu, 31 Jan 2019 14:48:26 -0800
Subject: [PATCH 17/29] Fix: Remove adm related non-optimized code.

---
 feature/src/adm_tools.h | 16 ----------------
 1 file changed, 16 deletions(-)

diff --git a/feature/src/adm_tools.h b/feature/src/adm_tools.h
index c95b1c47c..92d12d9b7 100644
--- a/feature/src/adm_tools.h
+++ b/feature/src/adm_tools.h
@@ -146,22 +146,6 @@ void dwt2_src_indices_filt_s(int **src_ind_y, int **src_ind_x, int w, int h);
 
 void adm_dwt2_s(const float *src, const adm_dwt_band_t_s *dst, int **ind_y, int **ind_x, int w, int h, int src_stride, int dst_stride);
 
-#else
-
-float adm_sum_cube_s(const float *x, int w, int h, int stride, double border_factor);
-
-void adm_decouple_s(const adm_dwt_band_t_s *ref, const adm_dwt_band_t_s *dis, const adm_dwt_band_t_s *r, const adm_dwt_band_t_s *a, int w, int h, int ref_stride, int dis_stride, int r_stride, int a_stride);
-
-void adm_csf_s(const adm_dwt_band_t_s *src, const adm_dwt_band_t_s *dst, int orig_h, int scale, int w, int h, int src_stride, int dst_stride);
-
-void adm_cm_thresh_s(const adm_dwt_band_t_s *src, float *dst, int w, int h, int src_stride, int dst_stride);
-
-void adm_cm_s(const adm_dwt_band_t_s *src, const adm_dwt_band_t_s *dst, const float *thresh, int w, int h, int src_stride, int dst_stride, int thresh_stride);
-
-void adm_dwt2_s(const float *src, const adm_dwt_band_t_s *dst, int w, int h, int src_stride, int dst_stride);
-
-void adm_buffer_copy(const void *src, void *dst, int linewidth, int h, int src_stride, int dst_stride);
-
 /* ================= */
 /* Noise floor model */
 /* ================= */

From bf0bc4154cb770533fe4c0338adcd980370b435a Mon Sep 17 00:00:00 2001
From: Zhi Li <zli@netflix.com>
Date: Thu, 31 Jan 2019 15:25:23 -0800
Subject: [PATCH 18/29] Update libvmaf.md

---
 resource/doc/libvmaf.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/resource/doc/libvmaf.md b/resource/doc/libvmaf.md
index 46c3cd82f..7a4e7edb5 100644
--- a/resource/doc/libvmaf.md
+++ b/resource/doc/libvmaf.md
@@ -41,7 +41,7 @@ To uninstall the library run:
 make uninstall
 ```
 
-### Use libvmaf with FFmpeg
+### Use `libvmaf.a` with FFmpeg
 
 After installing `libvmaf.a`, you can use it with FFmpeg. Under FFmpeg directory, configure, build and install FFmpeg with:
 

From a1cb186ff5930bf02406fa01f52e2f7e30b823df Mon Sep 17 00:00:00 2001
From: Zhi Li <zli@netflix.com>
Date: Thu, 31 Jan 2019 15:52:16 -0800
Subject: [PATCH 19/29] Update version to 1.3.12; update CHANGELOG, README,
 VERSION, libvmaf.pc.

---
 CHANGELOG.md       | 7 +++++++
 README.md          | 5 ++---
 VERSION            | 2 +-
 wrapper/libvmaf.pc | 2 +-
 4 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4f3ea5ead..9caef43a1 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,12 @@
 # Change Log
 
+## (1/31/2019) [1.3.12]
+
+**New features:**
+- Optimized C code for speed. Running in multithreading mode, `vmafossexec` achieves ~40% run time reduction compared to the previous version.
+- Printed out individual vmaf bootstrap scores in text file from `vmafossexec`.
+- refactored windows solution (#283) (#284) (#285) (#291)
+
 ## (12/17/2018) [1.3.11]
 
 **New features:**
diff --git a/README.md b/README.md
index e9f7b7d4d..b7212081d 100644
--- a/README.md
+++ b/README.md
@@ -2,16 +2,15 @@ VMAF - Video Multi-Method Assessment Fusion
 ===================
 [![Build Status](https://travis-ci.org/Netflix/vmaf.svg?branch=master)](https://travis-ci.org/Netflix/vmaf)
 
-VMAF is a perceptual video quality assessment algorithm developed by Netflix. VMAF Development Kit (VDK) is a software package that contains the VMAF algorithm implementation, as well as a set of tools that allows a user to train and test a custom VMAF model. For an overview, read [this](http://techblog.netflix.com/2016/06/toward-practical-perceptual-video.html) tech blog post, or [this](resource/doc/VMAF_ICIP17.pdf) slide deck.
+VMAF is a perceptual video quality assessment algorithm developed by Netflix. VMAF Development Kit (VDK) is a software package that contains the VMAF algorithm implementation, as well as a set of tools that allows a user to train and test a custom VMAF model. Read [this](https://medium.com/netflix-techblog/toward-a-practical-perceptual-video-quality-metric-653f208b9652) techblog post for an overview, or [this](https://medium.com/netflix-techblog/vmaf-the-journey-continues-44b51ee9ed12) post for the latest updates and tips for best practices.
 
 ## News
 
+- (1/31/19) Optimized C code for speed. Running in multithreading mode, `vmafossexec` achieves ~40% run time reduction compared to the previous version.
 - (11/19/18) Added a BD-rate calculator implementation. See more details [here](resource/doc/VMAF_Python_library.md#bd-rate-calculator).
 - (10/25/18) We have published our [second techblog on VMAF](https://medium.com/netflix-techblog/vmaf-the-journey-continues-44b51ee9ed12), with recommendations on best practices.
 - (9/13/18) [SUREAL](https://github.com/Netflix/sureal) is no longer a submodule to VMAF.
 - (6/19/18) Each VMAF prediction score now comes with a 95% [confidence interval (CI)](resource/doc/conf_interval.md), which quantifies the level of confidence that the prediction lies within the interval.
-- (6/19/18) Added a [4K VMAF model](resource/doc/models.md/#predict-quality-on-a-4ktv-screen-at-15h) under `model/vmaf_4k_v0.6.1.pkl`, which predicts the subjective quality of video displayed on a 4KTV and viewed from the distance of 1.5X the display height.
-- (6/5/18) Speed optimization to [`vmafossexec`](resource/doc/vmafossexec.md): 1) support multi-threading (e.g. use `--thread 0` to use all cores), 2) support frame sampling (e.g. use `--subsample 5` to calculate VMAF on one of every 5 frames).
 
 ## Frequently Asked Questions
 
diff --git a/VERSION b/VERSION
index 488dcd99b..1bd4e672b 100644
--- a/VERSION
+++ b/VERSION
@@ -1,2 +1,2 @@
-VMAF Development Kit (VDK) Version 1.3.11
+VMAF Development Kit (VDK) Version 1.3.12
 VMAF Version 0.6.1
diff --git a/wrapper/libvmaf.pc b/wrapper/libvmaf.pc
index 6dee1512c..e11b8d0fb 100644
--- a/wrapper/libvmaf.pc
+++ b/wrapper/libvmaf.pc
@@ -5,7 +5,7 @@ includedir=/usr/local/include
 
 Name: libvmaf
 Description: Netflix's VMAF library
-Version: 1.3.11
+Version: 1.3.12
 URL: https://github.com/Netflix/vmaf
 Requires:
 Requires.private:

From 3772d0154fd4999a6c6c23ff7bbcb5ecea55e816 Mon Sep 17 00:00:00 2001
From: Zhi Li <zli@netflix.com>
Date: Thu, 31 Jan 2019 17:44:53 -0800
Subject: [PATCH 20/29] Update CHANGELOG.md

---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9caef43a1..4ad285270 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,7 +5,7 @@
 **New features:**
 - Optimized C code for speed. Running in multithreading mode, `vmafossexec` achieves ~40% run time reduction compared to the previous version.
 - Printed out individual vmaf bootstrap scores in text file from `vmafossexec`.
-- refactored windows solution (#283) (#284) (#285) (#291)
+- refactored windows solution (#283) (#284) (#285) (#291) (#298)
 
 ## (12/17/2018) [1.3.11]
 

From fbb9d3ecda8cc2bd80ecbdd63f877825216045be Mon Sep 17 00:00:00 2001
From: Zhi Li <zli@netflix.com>
Date: Thu, 31 Jan 2019 17:50:08 -0800
Subject: [PATCH 21/29] Update version to 1.3.13.

---
 CHANGELOG.md       | 4 ++--
 VERSION            | 2 +-
 wrapper/libvmaf.pc | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4ad285270..a711deeb9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,11 +1,11 @@
 # Change Log
 
-## (1/31/2019) [1.3.12]
+## (1/31/2019) [1.3.13]
 
 **New features:**
 - Optimized C code for speed. Running in multithreading mode, `vmafossexec` achieves ~40% run time reduction compared to the previous version.
 - Printed out individual vmaf bootstrap scores in text file from `vmafossexec`.
-- refactored windows solution (#283) (#284) (#285) (#291) (#298)
+- refactored windows solution (#283) (#284) (#285) (#291) (#298).
 
 ## (12/17/2018) [1.3.11]
 
diff --git a/VERSION b/VERSION
index 1bd4e672b..3df5d2895 100644
--- a/VERSION
+++ b/VERSION
@@ -1,2 +1,2 @@
-VMAF Development Kit (VDK) Version 1.3.12
+VMAF Development Kit (VDK) Version 1.3.13
 VMAF Version 0.6.1
diff --git a/wrapper/libvmaf.pc b/wrapper/libvmaf.pc
index e11b8d0fb..e39035c80 100644
--- a/wrapper/libvmaf.pc
+++ b/wrapper/libvmaf.pc
@@ -5,7 +5,7 @@ includedir=/usr/local/include
 
 Name: libvmaf
 Description: Netflix's VMAF library
-Version: 1.3.12
+Version: 1.3.13
 URL: https://github.com/Netflix/vmaf
 Requires:
 Requires.private:

From 4333cc2460562b99e10a05595aba8ecb07a874d8 Mon Sep 17 00:00:00 2001
From: Holy Wu <HolyWu@users.noreply.github.com>
Date: Sat, 2 Feb 2019 11:07:04 +0800
Subject: [PATCH 22/29] Update VMAFOSS_DOC_VERSION to 1.3.13

---
 wrapper/src/vmaf.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/wrapper/src/vmaf.cpp b/wrapper/src/vmaf.cpp
index 5972d9c99..98ab6cc78 100644
--- a/wrapper/src/vmaf.cpp
+++ b/wrapper/src/vmaf.cpp
@@ -946,7 +946,7 @@ void BootstrapVmafQualityRunner::_set_prediction_result(
 
 }
 
-static const char VMAFOSS_DOC_VERSION[] = "1.3.11";
+static const char VMAFOSS_DOC_VERSION[] = "1.3.13";
 
 double RunVmaf(const char* fmt, int width, int height,
                int (*read_frame)(float *ref_data, float *main_data, float *temp_data, int stride, void *user_data),

From f848a98073116f4e28531b60951df049b4ddc682 Mon Sep 17 00:00:00 2001
From: Zhi Li <zli@netflix.com>
Date: Sat, 2 Feb 2019 15:00:59 -0800
Subject: [PATCH 23/29] Update year.

---
 CONTRIBUTING.md                             | 2 +-
 LICENSE                                     | 2 +-
 feature/src/adm.c                           | 2 +-
 feature/src/adm_tools.h                     | 2 +-
 feature/src/all.c                           | 2 +-
 feature/src/ansnr_tools.h                   | 2 +-
 feature/src/common/frame.c                  | 2 +-
 feature/src/common/frame.h                  | 2 +-
 feature/src/moment.c                        | 2 +-
 feature/src/moment_main.c                   | 2 +-
 feature/src/moment_options.h                | 2 +-
 feature/src/motion.c                        | 2 +-
 feature/src/motion_options.h                | 2 +-
 feature/src/ms_ssim.c                       | 2 +-
 feature/src/psnr.c                          | 2 +-
 feature/src/psnr_main.c                     | 2 +-
 feature/src/psnr_tools.c                    | 2 +-
 feature/src/psnr_tools.h                    | 2 +-
 feature/src/vif_options.h                   | 2 +-
 feature/src/vif_tools.c                     | 2 +-
 feature/src/vif_tools.h                     | 2 +-
 python/script/ffmpeg2vmaf.py                | 2 +-
 python/script/run_cleaning_cache.py         | 2 +-
 python/script/run_psnr.py                   | 2 +-
 python/script/run_testing.py                | 2 +-
 python/script/run_toddnoiseclassifier.py    | 2 +-
 python/script/run_vmaf.py                   | 2 +-
 python/script/run_vmaf_cross_validation.py  | 2 +-
 python/script/run_vmaf_in_batch.py          | 2 +-
 python/script/run_vmaf_training.py          | 2 +-
 python/src/vmaf/core/h5py_mixin.py          | 2 +-
 python/test/asset_test.py                   | 2 +-
 python/test/command_line_test.py            | 2 +-
 python/test/cross_validation_test.py        | 2 +-
 python/test/extra/command_line_extratest.py | 2 +-
 python/test/feature_assembler_test.py       | 2 +-
 python/test/feature_extractor_test.py       | 2 +-
 python/test/local_explainer_test.py         | 2 +-
 python/test/noref_feature_extractor_test.py | 2 +-
 python/test/perf_metric_test.py             | 2 +-
 python/test/quality_runner_test.py          | 2 +-
 python/test/raw_extractor_test.py           | 2 +-
 python/test/reader_test.py                  | 2 +-
 python/test/result_test.py                  | 2 +-
 python/test/routine_test.py                 | 2 +-
 python/test/testutil.py                     | 2 +-
 python/test/train_test_model_test.py        | 2 +-
 python/test/vmafossexec_test.py             | 2 +-
 48 files changed, 48 insertions(+), 48 deletions(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 3f0c685f6..ad5cf954d 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -8,7 +8,7 @@ By contributing your code, you agree to license your contribution under the term
 
 ```
 /**
- * Copyright 2016-2018 the original author or authors.
+ * Copyright 2016-2019 the original author or authors.
  * 
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/LICENSE b/LICENSE
index aa6790299..fd51098ac 100644
--- a/LICENSE
+++ b/LICENSE
@@ -186,7 +186,7 @@
       same "printed page" as the copyright notice for easier
       identification within third-party archives.
 
-   Copyright 2016-2018 Netflix, Inc.
+   Copyright 2016-2019 Netflix, Inc.
 
    Licensed under the Apache License, Version 2.0 (the "License");
    you may not use this file except in compliance with the License.
diff --git a/feature/src/adm.c b/feature/src/adm.c
index 8e9160b35..28ed2df1a 100644
--- a/feature/src/adm.c
+++ b/feature/src/adm.c
@@ -1,6 +1,6 @@
 /**
  *
- *  Copyright 2016-2018 Netflix, Inc.
+ *  Copyright 2016-2019 Netflix, Inc.
  *
  *     Licensed under the Apache License, Version 2.0 (the "License");
  *     you may not use this file except in compliance with the License.
diff --git a/feature/src/adm_tools.h b/feature/src/adm_tools.h
index 92d12d9b7..386447513 100644
--- a/feature/src/adm_tools.h
+++ b/feature/src/adm_tools.h
@@ -1,6 +1,6 @@
 /**
  *
- *  Copyright 2016-2018 Netflix, Inc.
+ *  Copyright 2016-2019 Netflix, Inc.
  *
  *     Licensed under the Apache License, Version 2.0 (the "License");
  *     you may not use this file except in compliance with the License.
diff --git a/feature/src/all.c b/feature/src/all.c
index 71d8c358e..9dece9325 100644
--- a/feature/src/all.c
+++ b/feature/src/all.c
@@ -1,6 +1,6 @@
 /**
  *
- *  Copyright 2016-2018 Netflix, Inc.
+ *  Copyright 2016-2019 Netflix, Inc.
  *
  *     Licensed under the Apache License, Version 2.0 (the "License");
  *     you may not use this file except in compliance with the License.
diff --git a/feature/src/ansnr_tools.h b/feature/src/ansnr_tools.h
index 32415b595..30cb52a59 100644
--- a/feature/src/ansnr_tools.h
+++ b/feature/src/ansnr_tools.h
@@ -1,6 +1,6 @@
 /**
  *
- *  Copyright 2016-2018 Netflix, Inc.
+ *  Copyright 2016-2019 Netflix, Inc.
  *
  *     Licensed under the Apache License, Version 2.0 (the "License");
  *     you may not use this file except in compliance with the License.
diff --git a/feature/src/common/frame.c b/feature/src/common/frame.c
index 1bdd87cec..fd5319430 100644
--- a/feature/src/common/frame.c
+++ b/feature/src/common/frame.c
@@ -1,6 +1,6 @@
 /**
  *
- *  Copyright 2016-2018 Netflix, Inc.
+ *  Copyright 2016-2019 Netflix, Inc.
  *
  *     Licensed under the Apache License, Version 2.0 (the "License");
  *     you may not use this file except in compliance with the License.
diff --git a/feature/src/common/frame.h b/feature/src/common/frame.h
index 33ec436a0..44e41bfd3 100644
--- a/feature/src/common/frame.h
+++ b/feature/src/common/frame.h
@@ -1,6 +1,6 @@
 /**
  *
- *  Copyright 2016-2018 Netflix, Inc.
+ *  Copyright 2016-2019 Netflix, Inc.
  *
  *     Licensed under the Apache License, Version 2.0 (the "License");
  *     you may not use this file except in compliance with the License.
diff --git a/feature/src/moment.c b/feature/src/moment.c
index 95268f9e8..63e31a649 100644
--- a/feature/src/moment.c
+++ b/feature/src/moment.c
@@ -1,6 +1,6 @@
 /**
  *
- *  Copyright 2016-2018 Netflix, Inc.
+ *  Copyright 2016-2019 Netflix, Inc.
  *
  *     Licensed under the Apache License, Version 2.0 (the "License");
  *     you may not use this file except in compliance with the License.
diff --git a/feature/src/moment_main.c b/feature/src/moment_main.c
index c201f21c6..e2d35e369 100644
--- a/feature/src/moment_main.c
+++ b/feature/src/moment_main.c
@@ -1,6 +1,6 @@
 /**
  *
- *  Copyright 2016-2018 Netflix, Inc.
+ *  Copyright 2016-2019 Netflix, Inc.
  *
  *     Licensed under the Apache License, Version 2.0 (the "License");
  *     you may not use this file except in compliance with the License.
diff --git a/feature/src/moment_options.h b/feature/src/moment_options.h
index 29d190d48..bc8a59dbb 100644
--- a/feature/src/moment_options.h
+++ b/feature/src/moment_options.h
@@ -1,6 +1,6 @@
 /**
  *
- *  Copyright 2016-2018 Netflix, Inc.
+ *  Copyright 2016-2019 Netflix, Inc.
  *
  *     Licensed under the Apache License, Version 2.0 (the "License");
  *     you may not use this file except in compliance with the License.
diff --git a/feature/src/motion.c b/feature/src/motion.c
index fa44d2def..17665ada8 100644
--- a/feature/src/motion.c
+++ b/feature/src/motion.c
@@ -1,6 +1,6 @@
 /**
  *
- *  Copyright 2016-2018 Netflix, Inc.
+ *  Copyright 2016-2019 Netflix, Inc.
  *
  *     Licensed under the Apache License, Version 2.0 (the "License");
  *     you may not use this file except in compliance with the License.
diff --git a/feature/src/motion_options.h b/feature/src/motion_options.h
index e17341d85..cf96903a0 100644
--- a/feature/src/motion_options.h
+++ b/feature/src/motion_options.h
@@ -1,6 +1,6 @@
 /**
  *
- *  Copyright 2016-2018 Netflix, Inc.
+ *  Copyright 2016-2019 Netflix, Inc.
  *
  *     Licensed under the Apache License, Version 2.0 (the "License");
  *     you may not use this file except in compliance with the License.
diff --git a/feature/src/ms_ssim.c b/feature/src/ms_ssim.c
index c47aa01e9..a8cdf33bf 100644
--- a/feature/src/ms_ssim.c
+++ b/feature/src/ms_ssim.c
@@ -1,6 +1,6 @@
 /**
  *
- *  Copyright 2016-2018 Netflix, Inc.
+ *  Copyright 2016-2019 Netflix, Inc.
  *
  *     Licensed under the Apache License, Version 2.0 (the "License");
  *     you may not use this file except in compliance with the License.
diff --git a/feature/src/psnr.c b/feature/src/psnr.c
index 6804dc1f6..81a229251 100644
--- a/feature/src/psnr.c
+++ b/feature/src/psnr.c
@@ -1,6 +1,6 @@
 /**
  *
- *  Copyright 2016-2018 Netflix, Inc.
+ *  Copyright 2016-2019 Netflix, Inc.
  *
  *     Licensed under the Apache License, Version 2.0 (the "License");
  *     you may not use this file except in compliance with the License.
diff --git a/feature/src/psnr_main.c b/feature/src/psnr_main.c
index 32ba86255..f036d2022 100644
--- a/feature/src/psnr_main.c
+++ b/feature/src/psnr_main.c
@@ -1,6 +1,6 @@
 /**
  *
- *  Copyright 2016-2018 Netflix, Inc.
+ *  Copyright 2016-2019 Netflix, Inc.
  *
  *     Licensed under the Apache License, Version 2.0 (the "License");
  *     you may not use this file except in compliance with the License.
diff --git a/feature/src/psnr_tools.c b/feature/src/psnr_tools.c
index 26934eac6..0c3825555 100644
--- a/feature/src/psnr_tools.c
+++ b/feature/src/psnr_tools.c
@@ -1,6 +1,6 @@
 /**
  *
- *  Copyright 2016-2018 Netflix, Inc.
+ *  Copyright 2016-2019 Netflix, Inc.
  *
  *     Licensed under the Apache License, Version 2.0 (the "License");
  *     you may not use this file except in compliance with the License.
diff --git a/feature/src/psnr_tools.h b/feature/src/psnr_tools.h
index 8f480cf82..a60dec78a 100644
--- a/feature/src/psnr_tools.h
+++ b/feature/src/psnr_tools.h
@@ -1,6 +1,6 @@
 /**
  *
- *  Copyright 2016-2018 Netflix, Inc.
+ *  Copyright 2016-2019 Netflix, Inc.
  *
  *     Licensed under the Apache License, Version 2.0 (the "License");
  *     you may not use this file except in compliance with the License.
diff --git a/feature/src/vif_options.h b/feature/src/vif_options.h
index ef1f93b2e..30bea189c 100644
--- a/feature/src/vif_options.h
+++ b/feature/src/vif_options.h
@@ -1,6 +1,6 @@
 /**
  *
- *  Copyright 2016-2018 Netflix, Inc.
+ *  Copyright 2016-2019 Netflix, Inc.
  *
  *     Licensed under the Apache License, Version 2.0 (the "License");
  *     you may not use this file except in compliance with the License.
diff --git a/feature/src/vif_tools.c b/feature/src/vif_tools.c
index 6e66a3355..5b6913ee8 100644
--- a/feature/src/vif_tools.c
+++ b/feature/src/vif_tools.c
@@ -1,6 +1,6 @@
 /**
  *
- *  Copyright 2016-2018 Netflix, Inc.
+ *  Copyright 2016-2019 Netflix, Inc.
  *
  *     Licensed under the Apache License, Version 2.0 (the "License");
  *     you may not use this file except in compliance with the License.
diff --git a/feature/src/vif_tools.h b/feature/src/vif_tools.h
index 459b934f9..61f882685 100644
--- a/feature/src/vif_tools.h
+++ b/feature/src/vif_tools.h
@@ -1,6 +1,6 @@
 /**
  *
- *  Copyright 2016-2018 Netflix, Inc.
+ *  Copyright 2016-2019 Netflix, Inc.
  *
  *     Licensed under the Apache License, Version 2.0 (the "License");
  *     you may not use this file except in compliance with the License.
diff --git a/python/script/ffmpeg2vmaf.py b/python/script/ffmpeg2vmaf.py
index 43091dc9b..f302a80a5 100755
--- a/python/script/ffmpeg2vmaf.py
+++ b/python/script/ffmpeg2vmaf.py
@@ -15,7 +15,7 @@
     cmd_option_exists
 from vmaf.tools.stats import ListStats
 
-__copyright__ = "Copyright 2016-2018, Netflix, Inc."
+__copyright__ = "Copyright 2016-2019, Netflix, Inc."
 __license__ = "Apache, Version 2.0"
 
 FMTS = ['yuv420p', 'yuv422p', 'yuv444p', 'yuv420p10le', 'yuv422p10le', 'yuv444p10le']
diff --git a/python/script/run_cleaning_cache.py b/python/script/run_cleaning_cache.py
index cd4a5b66c..a20f5dd29 100755
--- a/python/script/run_cleaning_cache.py
+++ b/python/script/run_cleaning_cache.py
@@ -6,7 +6,7 @@
 from vmaf.routine import run_remove_results_for_dataset
 from vmaf.tools.misc import import_python_file
 
-__copyright__ = "Copyright 2016-2018, Netflix, Inc."
+__copyright__ = "Copyright 2016-2019, Netflix, Inc."
 __license__ = "Apache, Version 2.0"
 
 
diff --git a/python/script/run_psnr.py b/python/script/run_psnr.py
index 1a5082cc0..d93f59f33 100755
--- a/python/script/run_psnr.py
+++ b/python/script/run_psnr.py
@@ -12,7 +12,7 @@
 from vmaf.tools.misc import get_cmd_option
 from vmaf.tools.stats import ListStats
 
-__copyright__ = "Copyright 2016-2018, Netflix, Inc."
+__copyright__ = "Copyright 2016-2019, Netflix, Inc."
 __license__ = "Apache, Version 2.0"
 
 FMTS = ['yuv420p', 'yuv422p', 'yuv444p', 'yuv420p10le', 'yuv422p10le', 'yuv444p10le']
diff --git a/python/script/run_testing.py b/python/script/run_testing.py
index 7ae2621dc..5e9e6fb9f 100755
--- a/python/script/run_testing.py
+++ b/python/script/run_testing.py
@@ -16,7 +16,7 @@
 from vmaf.routine import run_test_on_dataset, print_matplotlib_warning
 from vmaf.tools.stats import ListStats
 
-__copyright__ = "Copyright 2016-2018, Netflix, Inc."
+__copyright__ = "Copyright 2016-2019, Netflix, Inc."
 __license__ = "Apache, Version 2.0"
 
 POOL_METHODS = ['mean', 'harmonic_mean', 'min', 'median', 'perc5', 'perc10', 'perc20']
diff --git a/python/script/run_toddnoiseclassifier.py b/python/script/run_toddnoiseclassifier.py
index 23308e7b8..19fc0d0de 100644
--- a/python/script/run_toddnoiseclassifier.py
+++ b/python/script/run_toddnoiseclassifier.py
@@ -1,4 +1,4 @@
-__copyright__ = "Copyright 2016-2018, Netflix, Inc."
+__copyright__ = "Copyright 2016-2019, Netflix, Inc."
 __license__ = "Apache, Version 2.0"
 
 import os
diff --git a/python/script/run_vmaf.py b/python/script/run_vmaf.py
index 99d20c85a..75c116846 100755
--- a/python/script/run_vmaf.py
+++ b/python/script/run_vmaf.py
@@ -15,7 +15,7 @@
     cmd_option_exists
 from vmaf.tools.stats import ListStats
 
-__copyright__ = "Copyright 2016-2018, Netflix, Inc."
+__copyright__ = "Copyright 2016-2019, Netflix, Inc."
 __license__ = "Apache, Version 2.0"
 
 FMTS = ['yuv420p', 'yuv422p', 'yuv444p', 'yuv420p10le', 'yuv422p10le', 'yuv444p10le']
diff --git a/python/script/run_vmaf_cross_validation.py b/python/script/run_vmaf_cross_validation.py
index e261fba64..cbb51495d 100644
--- a/python/script/run_vmaf_cross_validation.py
+++ b/python/script/run_vmaf_cross_validation.py
@@ -1,4 +1,4 @@
-__copyright__ = "Copyright 2016-2018, Netflix, Inc."
+__copyright__ = "Copyright 2016-2019, Netflix, Inc."
 __license__ = "Apache, Version 2.0"
 
 import matplotlib.pyplot as plt
diff --git a/python/script/run_vmaf_in_batch.py b/python/script/run_vmaf_in_batch.py
index 803216b53..992ae0325 100755
--- a/python/script/run_vmaf_in_batch.py
+++ b/python/script/run_vmaf_in_batch.py
@@ -13,7 +13,7 @@
 from vmaf.tools.misc import cmd_option_exists, get_cmd_option
 from vmaf.tools.stats import ListStats
 
-__copyright__ = "Copyright 2016-2018, Netflix, Inc."
+__copyright__ = "Copyright 2016-2019, Netflix, Inc."
 __license__ = "Apache, Version 2.0"
 
 FMTS = ['yuv420p', 'yuv422p', 'yuv444p', 'yuv420p10le', 'yuv422p10le', 'yuv444p10le']
diff --git a/python/script/run_vmaf_training.py b/python/script/run_vmaf_training.py
index 320c15a24..651b54be8 100755
--- a/python/script/run_vmaf_training.py
+++ b/python/script/run_vmaf_training.py
@@ -14,7 +14,7 @@
 from vmaf.routine import print_matplotlib_warning, train_test_vmaf_on_dataset
 from vmaf.tools.stats import ListStats
 
-__copyright__ = "Copyright 2016-2018, Netflix, Inc."
+__copyright__ = "Copyright 2016-2019, Netflix, Inc."
 __license__ = "Apache, Version 2.0"
 
 POOL_METHODS = ['mean', 'harmonic_mean', 'min', 'median', 'perc5', 'perc10', 'perc20']
diff --git a/python/src/vmaf/core/h5py_mixin.py b/python/src/vmaf/core/h5py_mixin.py
index efa3f4619..659fee9bb 100644
--- a/python/src/vmaf/core/h5py_mixin.py
+++ b/python/src/vmaf/core/h5py_mixin.py
@@ -1,6 +1,6 @@
 import h5py
 
-__copyright__ = "Copyright 2016-2018, Netflix, Inc."
+__copyright__ = "Copyright 2016-2019, Netflix, Inc."
 __license__ = "Apache, Version 2.0"
 
 
diff --git a/python/test/asset_test.py b/python/test/asset_test.py
index 6880cee33..11cadb0ee 100644
--- a/python/test/asset_test.py
+++ b/python/test/asset_test.py
@@ -1,4 +1,4 @@
-__copyright__ = "Copyright 2016-2018, Netflix, Inc."
+__copyright__ = "Copyright 2016-2019, Netflix, Inc."
 __license__ = "Apache, Version 2.0"
 
 import unittest
diff --git a/python/test/command_line_test.py b/python/test/command_line_test.py
index f3ba4667a..27a5639da 100644
--- a/python/test/command_line_test.py
+++ b/python/test/command_line_test.py
@@ -5,7 +5,7 @@
 from vmaf.config import VmafConfig
 from vmaf.tools.misc import run_process
 
-__copyright__ = "Copyright 2016-2018, Netflix, Inc."
+__copyright__ = "Copyright 2016-2019, Netflix, Inc."
 __license__ = "Apache, Version 2.0"
 
 class CommandLineTest(unittest.TestCase):
diff --git a/python/test/cross_validation_test.py b/python/test/cross_validation_test.py
index 831d00834..2c2005a44 100644
--- a/python/test/cross_validation_test.py
+++ b/python/test/cross_validation_test.py
@@ -1,4 +1,4 @@
-__copyright__ = "Copyright 2016-2018, Netflix, Inc."
+__copyright__ = "Copyright 2016-2019, Netflix, Inc."
 __license__ = "Apache, Version 2.0"
 
 import unittest
diff --git a/python/test/extra/command_line_extratest.py b/python/test/extra/command_line_extratest.py
index 64fee4a49..362edbf3d 100644
--- a/python/test/extra/command_line_extratest.py
+++ b/python/test/extra/command_line_extratest.py
@@ -5,7 +5,7 @@
 from vmaf import run_process
 from vmaf.config import VmafConfig
 
-__copyright__ = "Copyright 2016-2018, Netflix, Inc."
+__copyright__ = "Copyright 2016-2019, Netflix, Inc."
 __license__ = "Apache, Version 2.0"
 
 class CommandLineTest(unittest.TestCase):
diff --git a/python/test/feature_assembler_test.py b/python/test/feature_assembler_test.py
index 2c4e490d7..0c552291f 100644
--- a/python/test/feature_assembler_test.py
+++ b/python/test/feature_assembler_test.py
@@ -1,4 +1,4 @@
-__copyright__ = "Copyright 2016-2018, Netflix, Inc."
+__copyright__ = "Copyright 2016-2019, Netflix, Inc."
 __license__ = "Apache, Version 2.0"
 
 import unittest
diff --git a/python/test/feature_extractor_test.py b/python/test/feature_extractor_test.py
index 2a4ec6c65..921ad8696 100644
--- a/python/test/feature_extractor_test.py
+++ b/python/test/feature_extractor_test.py
@@ -1,4 +1,4 @@
-__copyright__ = "Copyright 2016-2018, Netflix, Inc."
+__copyright__ = "Copyright 2016-2019, Netflix, Inc."
 __license__ = "Apache, Version 2.0"
 
 import os
diff --git a/python/test/local_explainer_test.py b/python/test/local_explainer_test.py
index 7076f4ddb..abb305919 100644
--- a/python/test/local_explainer_test.py
+++ b/python/test/local_explainer_test.py
@@ -16,7 +16,7 @@
 from vmaf.routine import read_dataset
 from vmaf.tools.misc import import_python_file
 
-__copyright__ = "Copyright 2016-2018, Netflix, Inc."
+__copyright__ = "Copyright 2016-2019, Netflix, Inc."
 __license__ = "Apache, Version 2.0"
 
 
diff --git a/python/test/noref_feature_extractor_test.py b/python/test/noref_feature_extractor_test.py
index c6ab47bcb..285017f12 100644
--- a/python/test/noref_feature_extractor_test.py
+++ b/python/test/noref_feature_extractor_test.py
@@ -1,6 +1,6 @@
 from vmaf.core.executor import run_executors_in_parallel
 
-__copyright__ = "Copyright 2016-2018, Netflix, Inc."
+__copyright__ = "Copyright 2016-2019, Netflix, Inc."
 __license__ = "Apache, Version 2.0"
 
 import unittest
diff --git a/python/test/perf_metric_test.py b/python/test/perf_metric_test.py
index ff88008d2..836cea6ee 100644
--- a/python/test/perf_metric_test.py
+++ b/python/test/perf_metric_test.py
@@ -7,7 +7,7 @@
 from vmaf.core.perf_metric import RmsePerfMetric, SrccPerfMetric, PccPerfMetric, \
     KendallPerfMetric, AucPerfMetric, ResolvingPowerPerfMetric
 
-__copyright__ = "Copyright 2016-2018, Netflix, Inc."
+__copyright__ = "Copyright 2016-2019, Netflix, Inc."
 __license__ = "Apache, Version 2.0"
 
 class AggrScorePerfMetricTest(unittest.TestCase):
diff --git a/python/test/quality_runner_test.py b/python/test/quality_runner_test.py
index 9268da547..f42e79c91 100644
--- a/python/test/quality_runner_test.py
+++ b/python/test/quality_runner_test.py
@@ -1,4 +1,4 @@
-__copyright__ = "Copyright 2016-2018, Netflix, Inc."
+__copyright__ = "Copyright 2016-2019, Netflix, Inc."
 __license__ = "Apache, Version 2.0"
 
 import os
diff --git a/python/test/raw_extractor_test.py b/python/test/raw_extractor_test.py
index 82b857dc2..d9fa77e1d 100644
--- a/python/test/raw_extractor_test.py
+++ b/python/test/raw_extractor_test.py
@@ -1,6 +1,6 @@
 from vmaf.core.executor import run_executors_in_parallel
 
-__copyright__ = "Copyright 2016-2018, Netflix, Inc."
+__copyright__ = "Copyright 2016-2019, Netflix, Inc."
 __license__ = "Apache, Version 2.0"
 
 import unittest
diff --git a/python/test/reader_test.py b/python/test/reader_test.py
index c0ed3fba3..3510e0a6d 100644
--- a/python/test/reader_test.py
+++ b/python/test/reader_test.py
@@ -1,4 +1,4 @@
-__copyright__ = "Copyright 2016-2018, Netflix, Inc."
+__copyright__ = "Copyright 2016-2019, Netflix, Inc."
 __license__ = "Apache, Version 2.0"
 
 import unittest
diff --git a/python/test/result_test.py b/python/test/result_test.py
index 8fa97f9bf..7ca9ea5d7 100644
--- a/python/test/result_test.py
+++ b/python/test/result_test.py
@@ -1,6 +1,6 @@
 from testutil import set_default_576_324_videos_for_testing
 
-__copyright__ = "Copyright 2016-2018, Netflix, Inc."
+__copyright__ = "Copyright 2016-2019, Netflix, Inc."
 __license__ = "Apache, Version 2.0"
 
 import json
diff --git a/python/test/routine_test.py b/python/test/routine_test.py
index 111d1e055..fcf31a8df 100644
--- a/python/test/routine_test.py
+++ b/python/test/routine_test.py
@@ -8,7 +8,7 @@
 from vmaf.core.quality_runner import VmafQualityRunner, BootstrapVmafQualityRunner
 from sureal.subjective_model import MosModel
 
-__copyright__ = "Copyright 2016-2018, Netflix, Inc."
+__copyright__ = "Copyright 2016-2019, Netflix, Inc."
 __license__ = "Apache, Version 2.0"
 
 
diff --git a/python/test/testutil.py b/python/test/testutil.py
index d665c02d2..980103f04 100644
--- a/python/test/testutil.py
+++ b/python/test/testutil.py
@@ -1,4 +1,4 @@
-__copyright__ = "Copyright 2016-2018, Netflix, Inc."
+__copyright__ = "Copyright 2016-2019, Netflix, Inc."
 __license__ = "Apache, Version 2.0"
 
 from vmaf.config import VmafConfig
diff --git a/python/test/train_test_model_test.py b/python/test/train_test_model_test.py
index 9192bfeb4..39417d051 100644
--- a/python/test/train_test_model_test.py
+++ b/python/test/train_test_model_test.py
@@ -13,7 +13,7 @@
 from vmaf.tools.misc import import_python_file
 from vmaf.core.raw_extractor import DisYUVRawVideoExtractor
 
-__copyright__ = "Copyright 2016-2018, Netflix, Inc."
+__copyright__ = "Copyright 2016-2019, Netflix, Inc."
 __license__ = "Apache, Version 2.0"
 
 class TrainTestModelTest(unittest.TestCase):
diff --git a/python/test/vmafossexec_test.py b/python/test/vmafossexec_test.py
index c06860c55..5d558df78 100644
--- a/python/test/vmafossexec_test.py
+++ b/python/test/vmafossexec_test.py
@@ -7,7 +7,7 @@
 
 from testutil import set_default_576_324_videos_for_testing
 
-__copyright__ = "Copyright 2016-2018, Netflix, Inc."
+__copyright__ = "Copyright 2016-2019, Netflix, Inc."
 __license__ = "Apache, Version 2.0"
 
 

From 7ec310955343a7ae4c6180cdb93b7eb51911cabf Mon Sep 17 00:00:00 2001
From: Zhi Li <zli@netflix.com>
Date: Sat, 2 Feb 2019 15:12:23 -0800
Subject: [PATCH 24/29] Update year.

---
 feature/src/adm_options.h                        | 2 +-
 feature/src/adm_tools.c                          | 2 +-
 feature/src/all_options.h                        | 2 +-
 feature/src/ansnr.c                              | 2 +-
 feature/src/ansnr_options.h                      | 2 +-
 feature/src/ansnr_tools.c                        | 2 +-
 feature/src/common/alignment.c                   | 2 +-
 feature/src/common/alignment.h                   | 2 +-
 feature/src/common/alloc.c                       | 2 +-
 feature/src/common/alloc.h                       | 2 +-
 feature/src/common/convolution.c                 | 2 +-
 feature/src/common/convolution.h                 | 2 +-
 feature/src/common/convolution_avx.c             | 2 +-
 feature/src/common/convolution_internal.h        | 2 +-
 feature/src/common/cpu.c                         | 2 +-
 feature/src/common/cpu.h                         | 2 +-
 feature/src/common/file_io.c                     | 2 +-
 feature/src/common/file_io.h                     | 2 +-
 feature/src/common/macros.h                      | 2 +-
 feature/src/iqa/iqa_options.h                    | 2 +-
 feature/src/motion_tools.h                       | 2 +-
 feature/src/ms_ssim_main.c                       | 2 +-
 feature/src/psnr_options.h                       | 2 +-
 feature/src/ssim.c                               | 2 +-
 feature/src/ssim_main.c                          | 2 +-
 feature/src/vif.c                                | 2 +-
 feature/src/vmaf_main.c                          | 2 +-
 python/script/run_result_assembly.py             | 2 +-
 python/script/run_vmafossexec_subsampling.py     | 2 +-
 python/src/vmaf/core/matlab_feature_extractor.py | 2 +-
 python/src/vmaf/core/matlab_quality_runner.py    | 2 +-
 python/src/vmaf/core/niqe_train_test_model.py    | 2 +-
 python/test/bootstrap_train_test_model_test.py   | 2 +-
 python/test/executor_test.py                     | 2 +-
 python/test/extra/testutil.py                    | 2 +-
 python/test/niqe_train_test_model_test.py        | 2 +-
 wrapper/src/combo.c                              | 2 +-
 wrapper/src/combo.h                              | 2 +-
 wrapper/src/darray.c                             | 2 +-
 wrapper/src/darray.h                             | 2 +-
 wrapper/src/debug.h                              | 2 +-
 wrapper/src/libvmaf.cpp                          | 2 +-
 wrapper/src/libvmaf.h                            | 2 +-
 wrapper/src/main.cpp                             | 2 +-
 wrapper/src/timer.h                              | 2 +-
 wrapper/src/vmaf.cpp                             | 2 +-
 wrapper/src/vmaf.h                               | 2 +-
 47 files changed, 47 insertions(+), 47 deletions(-)

diff --git a/feature/src/adm_options.h b/feature/src/adm_options.h
index 72f8fa7c4..952add1f8 100644
--- a/feature/src/adm_options.h
+++ b/feature/src/adm_options.h
@@ -1,6 +1,6 @@
 /**
  *
- *  Copyright 2016-2018 Netflix, Inc.
+ *  Copyright 2016-2019 Netflix, Inc.
  *
  *     Licensed under the Apache License, Version 2.0 (the "License");
  *     you may not use this file except in compliance with the License.
diff --git a/feature/src/adm_tools.c b/feature/src/adm_tools.c
index d5578c706..d92428b36 100644
--- a/feature/src/adm_tools.c
+++ b/feature/src/adm_tools.c
@@ -1,6 +1,6 @@
 /**
  *
- *  Copyright 2016-2018 Netflix, Inc.
+ *  Copyright 2016-2019 Netflix, Inc.
  *
  *     Licensed under the Apache License, Version 2.0 (the "License");
  *     you may not use this file except in compliance with the License.
diff --git a/feature/src/all_options.h b/feature/src/all_options.h
index 947182ca0..11c25f3b2 100644
--- a/feature/src/all_options.h
+++ b/feature/src/all_options.h
@@ -1,6 +1,6 @@
 /**
  *
- *  Copyright 2016-2018 Netflix, Inc.
+ *  Copyright 2016-2019 Netflix, Inc.
  *
  *     Licensed under the Apache License, Version 2.0 (the "License");
  *     you may not use this file except in compliance with the License.
diff --git a/feature/src/ansnr.c b/feature/src/ansnr.c
index 9557da5cd..1befd7174 100644
--- a/feature/src/ansnr.c
+++ b/feature/src/ansnr.c
@@ -1,6 +1,6 @@
 /**
  *
- *  Copyright 2016-2018 Netflix, Inc.
+ *  Copyright 2016-2019 Netflix, Inc.
  *
  *     Licensed under the Apache License, Version 2.0 (the "License");
  *     you may not use this file except in compliance with the License.
diff --git a/feature/src/ansnr_options.h b/feature/src/ansnr_options.h
index af3b5c099..b2c0908ed 100644
--- a/feature/src/ansnr_options.h
+++ b/feature/src/ansnr_options.h
@@ -1,6 +1,6 @@
 /**
  *
- *  Copyright 2016-2018 Netflix, Inc.
+ *  Copyright 2016-2019 Netflix, Inc.
  *
  *     Licensed under the Apache License, Version 2.0 (the "License");
  *     you may not use this file except in compliance with the License.
diff --git a/feature/src/ansnr_tools.c b/feature/src/ansnr_tools.c
index 2bb150b2d..d74c310e3 100644
--- a/feature/src/ansnr_tools.c
+++ b/feature/src/ansnr_tools.c
@@ -1,6 +1,6 @@
 /**
  *
- *  Copyright 2016-2018 Netflix, Inc.
+ *  Copyright 2016-2019 Netflix, Inc.
  *
  *     Licensed under the Apache License, Version 2.0 (the "License");
  *     you may not use this file except in compliance with the License.
diff --git a/feature/src/common/alignment.c b/feature/src/common/alignment.c
index eade10ec8..4bb520585 100644
--- a/feature/src/common/alignment.c
+++ b/feature/src/common/alignment.c
@@ -1,6 +1,6 @@
 /**
  *
- *  Copyright 2016-2018 Netflix, Inc.
+ *  Copyright 2016-2019 Netflix, Inc.
  *
  *     Licensed under the Apache License, Version 2.0 (the "License");
  *     you may not use this file except in compliance with the License.
diff --git a/feature/src/common/alignment.h b/feature/src/common/alignment.h
index 7aa6dea81..f57da04e7 100644
--- a/feature/src/common/alignment.h
+++ b/feature/src/common/alignment.h
@@ -1,6 +1,6 @@
 /**
  *
- *  Copyright 2016-2018 Netflix, Inc.
+ *  Copyright 2016-2019 Netflix, Inc.
  *
  *     Licensed under the Apache License, Version 2.0 (the "License");
  *     you may not use this file except in compliance with the License.
diff --git a/feature/src/common/alloc.c b/feature/src/common/alloc.c
index 25879a111..e7e6754d7 100644
--- a/feature/src/common/alloc.c
+++ b/feature/src/common/alloc.c
@@ -1,6 +1,6 @@
 /**
  *
- *  Copyright 2016-2018 Netflix, Inc.
+ *  Copyright 2016-2019 Netflix, Inc.
  *
  *     Licensed under the Apache License, Version 2.0 (the "License");
  *     you may not use this file except in compliance with the License.
diff --git a/feature/src/common/alloc.h b/feature/src/common/alloc.h
index b4c07f3e9..228d87544 100644
--- a/feature/src/common/alloc.h
+++ b/feature/src/common/alloc.h
@@ -1,6 +1,6 @@
 /**
  *
- *  Copyright 2016-2018 Netflix, Inc.
+ *  Copyright 2016-2019 Netflix, Inc.
  *
  *     Licensed under the Apache License, Version 2.0 (the "License");
  *     you may not use this file except in compliance with the License.
diff --git a/feature/src/common/convolution.c b/feature/src/common/convolution.c
index dcea7b4ea..e93d1ff2a 100644
--- a/feature/src/common/convolution.c
+++ b/feature/src/common/convolution.c
@@ -1,6 +1,6 @@
 /**
  *
- *  Copyright 2016-2018 Netflix, Inc.
+ *  Copyright 2016-2019 Netflix, Inc.
  *
  *     Licensed under the Apache License, Version 2.0 (the "License");
  *     you may not use this file except in compliance with the License.
diff --git a/feature/src/common/convolution.h b/feature/src/common/convolution.h
index 1fb6344e0..d1840b0d2 100644
--- a/feature/src/common/convolution.h
+++ b/feature/src/common/convolution.h
@@ -1,6 +1,6 @@
 /**
  *
- *  Copyright 2016-2018 Netflix, Inc.
+ *  Copyright 2016-2019 Netflix, Inc.
  *
  *     Licensed under the Apache License, Version 2.0 (the "License");
  *     you may not use this file except in compliance with the License.
diff --git a/feature/src/common/convolution_avx.c b/feature/src/common/convolution_avx.c
index c11d4540d..a066de691 100644
--- a/feature/src/common/convolution_avx.c
+++ b/feature/src/common/convolution_avx.c
@@ -1,6 +1,6 @@
 /**
  *
- *  Copyright 2016-2018 Netflix, Inc.
+ *  Copyright 2016-2019 Netflix, Inc.
  *
  *     Licensed under the Apache License, Version 2.0 (the "License");
  *     you may not use this file except in compliance with the License.
diff --git a/feature/src/common/convolution_internal.h b/feature/src/common/convolution_internal.h
index 7dac2d21a..b8a9e2564 100644
--- a/feature/src/common/convolution_internal.h
+++ b/feature/src/common/convolution_internal.h
@@ -1,6 +1,6 @@
 /**
  *
- *  Copyright 2016-2018 Netflix, Inc.
+ *  Copyright 2016-2019 Netflix, Inc.
  *
  *     Licensed under the Apache License, Version 2.0 (the "License");
  *     you may not use this file except in compliance with the License.
diff --git a/feature/src/common/cpu.c b/feature/src/common/cpu.c
index 4befea251..85156f804 100644
--- a/feature/src/common/cpu.c
+++ b/feature/src/common/cpu.c
@@ -1,6 +1,6 @@
 /**
  *
- *  Copyright 2016-2018 Netflix, Inc.
+ *  Copyright 2016-2019 Netflix, Inc.
  *
  *     Licensed under the Apache License, Version 2.0 (the "License");
  *     you may not use this file except in compliance with the License.
diff --git a/feature/src/common/cpu.h b/feature/src/common/cpu.h
index 9999bdc2a..1225bbb0b 100644
--- a/feature/src/common/cpu.h
+++ b/feature/src/common/cpu.h
@@ -1,6 +1,6 @@
 /**
  *
- *  Copyright 2016-2018 Netflix, Inc.
+ *  Copyright 2016-2019 Netflix, Inc.
  *
  *     Licensed under the Apache License, Version 2.0 (the "License");
  *     you may not use this file except in compliance with the License.
diff --git a/feature/src/common/file_io.c b/feature/src/common/file_io.c
index b51a4ba60..d8ebfbd1f 100644
--- a/feature/src/common/file_io.c
+++ b/feature/src/common/file_io.c
@@ -1,6 +1,6 @@
 /**
  *
- *  Copyright 2016-2018 Netflix, Inc.
+ *  Copyright 2016-2019 Netflix, Inc.
  *
  *     Licensed under the Apache License, Version 2.0 (the "License");
  *     you may not use this file except in compliance with the License.
diff --git a/feature/src/common/file_io.h b/feature/src/common/file_io.h
index 26ddcfbce..4ecbec900 100644
--- a/feature/src/common/file_io.h
+++ b/feature/src/common/file_io.h
@@ -1,6 +1,6 @@
 /**
  *
- *  Copyright 2016-2018 Netflix, Inc.
+ *  Copyright 2016-2019 Netflix, Inc.
  *
  *     Licensed under the Apache License, Version 2.0 (the "License");
  *     you may not use this file except in compliance with the License.
diff --git a/feature/src/common/macros.h b/feature/src/common/macros.h
index 565c693e2..989cb6cec 100644
--- a/feature/src/common/macros.h
+++ b/feature/src/common/macros.h
@@ -1,6 +1,6 @@
 /**
  *
- *  Copyright 2016-2018 Netflix, Inc.
+ *  Copyright 2016-2019 Netflix, Inc.
  *
  *     Licensed under the Apache License, Version 2.0 (the "License");
  *     you may not use this file except in compliance with the License.
diff --git a/feature/src/iqa/iqa_options.h b/feature/src/iqa/iqa_options.h
index 907612354..c1d609717 100644
--- a/feature/src/iqa/iqa_options.h
+++ b/feature/src/iqa/iqa_options.h
@@ -1,6 +1,6 @@
 /**
  *
- *  Copyright 2016-2018 Netflix, Inc.
+ *  Copyright 2016-2019 Netflix, Inc.
  *
  *     Licensed under the Apache License, Version 2.0 (the "License");
  *     you may not use this file except in compliance with the License.
diff --git a/feature/src/motion_tools.h b/feature/src/motion_tools.h
index cd8db5857..138bcb4b1 100644
--- a/feature/src/motion_tools.h
+++ b/feature/src/motion_tools.h
@@ -1,6 +1,6 @@
 /**
  *
- *  Copyright 2016-2018 Netflix, Inc.
+ *  Copyright 2016-2019 Netflix, Inc.
  *
  *     Licensed under the Apache License, Version 2.0 (the "License");
  *     you may not use this file except in compliance with the License.
diff --git a/feature/src/ms_ssim_main.c b/feature/src/ms_ssim_main.c
index 22a16ca73..02eac271a 100644
--- a/feature/src/ms_ssim_main.c
+++ b/feature/src/ms_ssim_main.c
@@ -1,6 +1,6 @@
 /**
  *
- *  Copyright 2016-2018 Netflix, Inc.
+ *  Copyright 2016-2019 Netflix, Inc.
  *
  *     Licensed under the Apache License, Version 2.0 (the "License");
  *     you may not use this file except in compliance with the License.
diff --git a/feature/src/psnr_options.h b/feature/src/psnr_options.h
index 8693bcd84..5b7b0f2e3 100644
--- a/feature/src/psnr_options.h
+++ b/feature/src/psnr_options.h
@@ -1,6 +1,6 @@
 /**
  *
- *  Copyright 2016-2018 Netflix, Inc.
+ *  Copyright 2016-2019 Netflix, Inc.
  *
  *     Licensed under the Apache License, Version 2.0 (the "License");
  *     you may not use this file except in compliance with the License.
diff --git a/feature/src/ssim.c b/feature/src/ssim.c
index 2db950ac5..e3e6d6d2a 100644
--- a/feature/src/ssim.c
+++ b/feature/src/ssim.c
@@ -1,6 +1,6 @@
 /**
  *
- *  Copyright 2016-2018 Netflix, Inc.
+ *  Copyright 2016-2019 Netflix, Inc.
  *
  *     Licensed under the Apache License, Version 2.0 (the "License");
  *     you may not use this file except in compliance with the License.
diff --git a/feature/src/ssim_main.c b/feature/src/ssim_main.c
index 72a9b3869..a3fb4c143 100644
--- a/feature/src/ssim_main.c
+++ b/feature/src/ssim_main.c
@@ -1,6 +1,6 @@
 /**
  *
- *  Copyright 2016-2018 Netflix, Inc.
+ *  Copyright 2016-2019 Netflix, Inc.
  *
  *     Licensed under the Apache License, Version 2.0 (the "License");
  *     you may not use this file except in compliance with the License.
diff --git a/feature/src/vif.c b/feature/src/vif.c
index 65faa1968..91d844d60 100644
--- a/feature/src/vif.c
+++ b/feature/src/vif.c
@@ -1,6 +1,6 @@
 /**
  *
- *  Copyright 2016-2018 Netflix, Inc.
+ *  Copyright 2016-2019 Netflix, Inc.
  *
  *     Licensed under the Apache License, Version 2.0 (the "License");
  *     you may not use this file except in compliance with the License.
diff --git a/feature/src/vmaf_main.c b/feature/src/vmaf_main.c
index 920cc9794..7a5489405 100644
--- a/feature/src/vmaf_main.c
+++ b/feature/src/vmaf_main.c
@@ -1,6 +1,6 @@
 /**
  *
- *  Copyright 2016-2018 Netflix, Inc.
+ *  Copyright 2016-2019 Netflix, Inc.
  *
  *     Licensed under the Apache License, Version 2.0 (the "License");
  *     you may not use this file except in compliance with the License.
diff --git a/python/script/run_result_assembly.py b/python/script/run_result_assembly.py
index cbd33d767..015f76484 100644
--- a/python/script/run_result_assembly.py
+++ b/python/script/run_result_assembly.py
@@ -8,7 +8,7 @@
 
 from vmaf.core.result import Result
 
-__copyright__ = "Copyright 2016-2018, Netflix, Inc."
+__copyright__ = "Copyright 2016-2019, Netflix, Inc."
 __license__ = "Apache, Version 2.0"
 
 
diff --git a/python/script/run_vmafossexec_subsampling.py b/python/script/run_vmafossexec_subsampling.py
index 36f48554b..ff6a2a3b9 100644
--- a/python/script/run_vmafossexec_subsampling.py
+++ b/python/script/run_vmafossexec_subsampling.py
@@ -11,7 +11,7 @@
 from vmaf.tools.decorator import persist_to_dir
 from vmaf.tools.misc import import_python_file
 
-__copyright__ = "Copyright 2016-2018, Netflix, Inc."
+__copyright__ = "Copyright 2016-2019, Netflix, Inc."
 __license__ = "Apache, Version 2.0"
 
 
diff --git a/python/src/vmaf/core/matlab_feature_extractor.py b/python/src/vmaf/core/matlab_feature_extractor.py
index 480172e60..1160d7656 100644
--- a/python/src/vmaf/core/matlab_feature_extractor.py
+++ b/python/src/vmaf/core/matlab_feature_extractor.py
@@ -6,7 +6,7 @@
 from vmaf.tools.misc import make_absolute_path, run_process
 from vmaf.tools.stats import ListStats
 
-__copyright__ = "Copyright 2016-2018, Netflix, Inc."
+__copyright__ = "Copyright 2016-2019, Netflix, Inc."
 __license__ = "Apache, Version 2.0"
 
 
diff --git a/python/src/vmaf/core/matlab_quality_runner.py b/python/src/vmaf/core/matlab_quality_runner.py
index 63acec42a..1902478a4 100644
--- a/python/src/vmaf/core/matlab_quality_runner.py
+++ b/python/src/vmaf/core/matlab_quality_runner.py
@@ -7,7 +7,7 @@
 from vmaf.core.quality_runner import QualityRunner
 from vmaf.core.result import Result
 
-__copyright__ = "Copyright 2016-2018, Netflix, Inc."
+__copyright__ = "Copyright 2016-2019, Netflix, Inc."
 __license__ = "Apache, Version 2.0"
 
 
diff --git a/python/src/vmaf/core/niqe_train_test_model.py b/python/src/vmaf/core/niqe_train_test_model.py
index f766a56bb..45576537e 100644
--- a/python/src/vmaf/core/niqe_train_test_model.py
+++ b/python/src/vmaf/core/niqe_train_test_model.py
@@ -1,4 +1,4 @@
-__copyright__ = "Copyright 2016-2018, Netflix, Inc."
+__copyright__ = "Copyright 2016-2019, Netflix, Inc."
 __license__ = "Apache, Version 2.0"
 
 import numpy as np
diff --git a/python/test/bootstrap_train_test_model_test.py b/python/test/bootstrap_train_test_model_test.py
index 8e092af1e..9b76937d8 100644
--- a/python/test/bootstrap_train_test_model_test.py
+++ b/python/test/bootstrap_train_test_model_test.py
@@ -10,7 +10,7 @@
     BootstrapSklearnRandomForestTrainTestModel, ResidueBootstrapLibsvmNusvrTrainTestModel, \
     ResidueBootstrapRandomForestTrainTestModel
 
-__copyright__ = "Copyright 2016-2018, Netflix, Inc."
+__copyright__ = "Copyright 2016-2019, Netflix, Inc."
 __license__ = "Apache, Version 2.0"
 
 
diff --git a/python/test/executor_test.py b/python/test/executor_test.py
index 32f3181ae..fb36a4689 100644
--- a/python/test/executor_test.py
+++ b/python/test/executor_test.py
@@ -2,7 +2,7 @@
 from vmaf.core.asset import Asset
 from vmaf.core.executor import Executor
 
-__copyright__ = "Copyright 2016-2018, Netflix, Inc."
+__copyright__ = "Copyright 2016-2019, Netflix, Inc."
 __license__ = "Apache, Version 2.0"
 
 class ExecutorTest(unittest.TestCase):
diff --git a/python/test/extra/testutil.py b/python/test/extra/testutil.py
index d665c02d2..980103f04 100644
--- a/python/test/extra/testutil.py
+++ b/python/test/extra/testutil.py
@@ -1,4 +1,4 @@
-__copyright__ = "Copyright 2016-2018, Netflix, Inc."
+__copyright__ = "Copyright 2016-2019, Netflix, Inc."
 __license__ = "Apache, Version 2.0"
 
 from vmaf.config import VmafConfig
diff --git a/python/test/niqe_train_test_model_test.py b/python/test/niqe_train_test_model_test.py
index 5458591bc..f8841c0c6 100644
--- a/python/test/niqe_train_test_model_test.py
+++ b/python/test/niqe_train_test_model_test.py
@@ -1,4 +1,4 @@
-__copyright__ = "Copyright 2016-2018, Netflix, Inc."
+__copyright__ = "Copyright 2016-2019, Netflix, Inc."
 __license__ = "Apache, Version 2.0"
 
 import unittest
diff --git a/wrapper/src/combo.c b/wrapper/src/combo.c
index d12526b9f..a1d8c940b 100644
--- a/wrapper/src/combo.c
+++ b/wrapper/src/combo.c
@@ -1,6 +1,6 @@
 /**
  *
- *  Copyright 2016-2018 Netflix, Inc.
+ *  Copyright 2016-2019 Netflix, Inc.
  *
  *     Licensed under the Apache License, Version 2.0 (the "License");
  *     you may not use this file except in compliance with the License.
diff --git a/wrapper/src/combo.h b/wrapper/src/combo.h
index 2c5473ec8..8a54fc7c7 100644
--- a/wrapper/src/combo.h
+++ b/wrapper/src/combo.h
@@ -1,6 +1,6 @@
 /**
  *
- *  Copyright 2016-2018 Netflix, Inc.
+ *  Copyright 2016-2019 Netflix, Inc.
  *
  *     Licensed under the Apache License, Version 2.0 (the "License");
  *     you may not use this file except in compliance with the License.
diff --git a/wrapper/src/darray.c b/wrapper/src/darray.c
index 90447b93e..c0eadee8c 100644
--- a/wrapper/src/darray.c
+++ b/wrapper/src/darray.c
@@ -1,6 +1,6 @@
 /**
  *
- *  Copyright 2016-2018 Netflix, Inc.
+ *  Copyright 2016-2019 Netflix, Inc.
  *
  *     Licensed under the Apache License, Version 2.0 (the "License");
  *     you may not use this file except in compliance with the License.
diff --git a/wrapper/src/darray.h b/wrapper/src/darray.h
index ba8c1eae6..de161bc84 100644
--- a/wrapper/src/darray.h
+++ b/wrapper/src/darray.h
@@ -1,6 +1,6 @@
 /**
  *
- *  Copyright 2016-2018 Netflix, Inc.
+ *  Copyright 2016-2019 Netflix, Inc.
  *
  *     Licensed under the Apache License, Version 2.0 (the "License");
  *     you may not use this file except in compliance with the License.
diff --git a/wrapper/src/debug.h b/wrapper/src/debug.h
index 395a3617c..cdf8e4ba3 100644
--- a/wrapper/src/debug.h
+++ b/wrapper/src/debug.h
@@ -1,6 +1,6 @@
 /**
  *
- *  Copyright 2016-2018 Netflix, Inc.
+ *  Copyright 2016-2019 Netflix, Inc.
  *
  *     Licensed under the Apache License, Version 2.0 (the "License");
  *     you may not use this file except in compliance with the License.
diff --git a/wrapper/src/libvmaf.cpp b/wrapper/src/libvmaf.cpp
index e8e09de30..2f2572b59 100644
--- a/wrapper/src/libvmaf.cpp
+++ b/wrapper/src/libvmaf.cpp
@@ -1,6 +1,6 @@
 /**
  *
- *  Copyright 2016-2018 Netflix, Inc.
+ *  Copyright 2016-2019 Netflix, Inc.
  *
  *     Licensed under the Apache License, Version 2.0 (the "License");
  *     you may not use this file except in compliance with the License.
diff --git a/wrapper/src/libvmaf.h b/wrapper/src/libvmaf.h
index 71c82f69e..a3b4145b2 100644
--- a/wrapper/src/libvmaf.h
+++ b/wrapper/src/libvmaf.h
@@ -1,6 +1,6 @@
 /**
  *
- *  Copyright 2016-2018 Netflix, Inc.
+ *  Copyright 2016-2019 Netflix, Inc.
  *
  *     Licensed under the Apache License, Version 2.0 (the "License");
  *     you may not use this file except in compliance with the License.
diff --git a/wrapper/src/main.cpp b/wrapper/src/main.cpp
index 7c714d331..ead8b21c5 100644
--- a/wrapper/src/main.cpp
+++ b/wrapper/src/main.cpp
@@ -1,6 +1,6 @@
 /**
  *
- *  Copyright 2016-2018 Netflix, Inc.
+ *  Copyright 2016-2019 Netflix, Inc.
  *
  *     Licensed under the Apache License, Version 2.0 (the "License");
  *     you may not use this file except in compliance with the License.
diff --git a/wrapper/src/timer.h b/wrapper/src/timer.h
index 2f6756af1..d7d005933 100644
--- a/wrapper/src/timer.h
+++ b/wrapper/src/timer.h
@@ -1,6 +1,6 @@
 /**
  *
- *  Copyright 2016-2018 Netflix, Inc.
+ *  Copyright 2016-2019 Netflix, Inc.
  *
  *     Licensed under the Apache License, Version 2.0 (the "License");
  *     you may not use this file except in compliance with the License.
diff --git a/wrapper/src/vmaf.cpp b/wrapper/src/vmaf.cpp
index 98ab6cc78..963433faf 100644
--- a/wrapper/src/vmaf.cpp
+++ b/wrapper/src/vmaf.cpp
@@ -1,6 +1,6 @@
 /**
  *
- *  Copyright 2016-2018 Netflix, Inc.
+ *  Copyright 2016-2019 Netflix, Inc.
  *
  *     Licensed under the Apache License, Version 2.0 (the "License");
  *     you may not use this file except in compliance with the License.
diff --git a/wrapper/src/vmaf.h b/wrapper/src/vmaf.h
index 9a710052c..0bbf882ea 100644
--- a/wrapper/src/vmaf.h
+++ b/wrapper/src/vmaf.h
@@ -1,6 +1,6 @@
 /**
  *
- *  Copyright 2016-2018 Netflix, Inc.
+ *  Copyright 2016-2019 Netflix, Inc.
  *
  *     Licensed under the Apache License, Version 2.0 (the "License");
  *     you may not use this file except in compliance with the License.

From 02e95ba523fbef0ada7006d0d47b9b50cd16a443 Mon Sep 17 00:00:00 2001
From: kjerbi <khaled_jerbi@yahoo.fr>
Date: Thu, 7 Feb 2019 09:58:41 -0800
Subject: [PATCH 25/29] fix w10 error with using uninitialized offset_flag
 variable

---
 wrapper/src/combo.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/wrapper/src/combo.c b/wrapper/src/combo.c
index a1d8c940b..e13458672 100644
--- a/wrapper/src/combo.c
+++ b/wrapper/src/combo.c
@@ -99,7 +99,7 @@ void* combo_threadfunc(void* vmaf_thread_data)
     int ret = 0;
     bool next_frame_read;
 
-    bool offset_flag;
+    bool offset_flag = false;
 
 #ifdef MULTI_THREADING
     float *prev_blur_buf_ = 0;

From 69593bf5d79240f51709f290010980ed120a7071 Mon Sep 17 00:00:00 2001
From: Zhi Li <zli@netflix.com>
Date: Thu, 7 Feb 2019 21:41:58 -0800
Subject: [PATCH 26/29] Update libvmaf.md

---
 resource/doc/libvmaf.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/resource/doc/libvmaf.md b/resource/doc/libvmaf.md
index 7a4e7edb5..7f1dccab8 100644
--- a/resource/doc/libvmaf.md
+++ b/resource/doc/libvmaf.md
@@ -59,4 +59,4 @@ ffmpeg -i main.mpg -i ref.mpg -filter_complex \
 
 Here `main.mpg` is a downscaled and encoded video and `ref.mpg` is its reference source at 1080p. The command scales the first input video (`0:v`) and forwards it to VMAF (`libvmaf`) with the label `main`, where it is compared against the second input reference video, `1:v`. Bicubic upsampling is used (also see the [techblog](https://medium.com/netflix-techblog/vmaf-the-journey-continues-44b51ee9ed12) for the recommendation on upsampling methods).
 
-See the [FFmpeg Filtering Guide](https://trac.ffmpeg.org/wiki/FilteringGuide) for more examples of complex filters, and the [Scaling Guide](https://trac.ffmpeg.org/wiki/Scaling) for information about scaling and using different scaling algorithms.
+See the [libvmaf](https://ffmpeg.org/ffmpeg-filters.html#libvmaf) section for FFmpeg's guide to libvmaf, the [FFmpeg Filtering Guide](https://trac.ffmpeg.org/wiki/FilteringGuide) for more examples of complex filters, and the [Scaling Guide](https://trac.ffmpeg.org/wiki/Scaling) for information about scaling and using different scaling algorithms.

From cd3f5bb9dba29047be05ea2e4e672f9f44f0fe48 Mon Sep 17 00:00:00 2001
From: Zhi Li <zli@netflix.com>
Date: Fri, 8 Feb 2019 08:48:24 -0800
Subject: [PATCH 27/29] Update libvmaf.md

---
 resource/doc/libvmaf.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/resource/doc/libvmaf.md b/resource/doc/libvmaf.md
index 7f1dccab8..81abb89b9 100644
--- a/resource/doc/libvmaf.md
+++ b/resource/doc/libvmaf.md
@@ -59,4 +59,4 @@ ffmpeg -i main.mpg -i ref.mpg -filter_complex \
 
 Here `main.mpg` is a downscaled and encoded video and `ref.mpg` is its reference source at 1080p. The command scales the first input video (`0:v`) and forwards it to VMAF (`libvmaf`) with the label `main`, where it is compared against the second input reference video, `1:v`. Bicubic upsampling is used (also see the [techblog](https://medium.com/netflix-techblog/vmaf-the-journey-continues-44b51ee9ed12) for the recommendation on upsampling methods).
 
-See the [libvmaf](https://ffmpeg.org/ffmpeg-filters.html#libvmaf) section for FFmpeg's guide to libvmaf, the [FFmpeg Filtering Guide](https://trac.ffmpeg.org/wiki/FilteringGuide) for more examples of complex filters, and the [Scaling Guide](https://trac.ffmpeg.org/wiki/Scaling) for information about scaling and using different scaling algorithms.
+See the [FFmpeg's guide to libvmaf](https://ffmpeg.org/ffmpeg-filters.html#libvmaf), the [FFmpeg Filtering Guide](https://trac.ffmpeg.org/wiki/FilteringGuide) for more examples of complex filters, and the [Scaling Guide](https://trac.ffmpeg.org/wiki/Scaling) for information about scaling and using different scaling algorithms.

From a538d7cf4524dbef321c04a845704152541eef3a Mon Sep 17 00:00:00 2001
From: Holy Wu <HolyWu@users.noreply.github.com>
Date: Tue, 12 Feb 2019 13:00:07 +0800
Subject: [PATCH 28/29] Report aggregate CI scores in vmafossexec

Also write pool method to the xml log when it's specified.
---
 wrapper/src/vmaf.cpp | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/wrapper/src/vmaf.cpp b/wrapper/src/vmaf.cpp
index 963433faf..9475c647e 100644
--- a/wrapper/src/vmaf.cpp
+++ b/wrapper/src/vmaf.cpp
@@ -1006,6 +1006,16 @@ double RunVmaf(const char* fmt, int width, int height,
 
     std::vector<std::string> result_keys = result.get_keys();
 
+    double aggregate_bagging = 0.0, aggregate_stddev = 0.0, aggregate_ci95_low = 0.0, aggregate_ci95_high = 0.0;
+    if (result.has_scores("bagging"))
+        aggregate_bagging = result.get_score("bagging");
+    if (result.has_scores("stddev"))
+        aggregate_stddev = result.get_score("stddev");
+    if (result.has_scores("ci95_low"))
+        aggregate_ci95_low = result.get_score("ci95_low");
+    if (result.has_scores("ci95_high"))
+        aggregate_ci95_high = result.get_score("ci95_high");
+
     double aggregate_psnr = 0.0, aggregate_ssim = 0.0, aggregate_ms_ssim = 0.0;
     if (result.has_scores("psnr"))
         aggregate_psnr = result.get_score("psnr");
@@ -1017,6 +1027,14 @@ double RunVmaf(const char* fmt, int width, int height,
     if (pool_method)
     {
         printf("VMAF score (%s) = %f\n", pool_method, aggregate_vmaf);
+        if (aggregate_bagging)
+            printf("Bagging score (%s) = %f\n", pool_method, aggregate_bagging);
+        if (aggregate_stddev)
+            printf("StdDev score (%s) = %f\n", pool_method, aggregate_stddev);
+        if (aggregate_ci95_low)
+            printf("CI95_low score (%s) = %f\n", pool_method, aggregate_ci95_low);
+        if (aggregate_ci95_high)
+            printf("CI95_high score (%s) = %f\n", pool_method, aggregate_ci95_high);
         if (aggregate_psnr)
             printf("PSNR score (%s) = %f\n", pool_method, aggregate_psnr);
         if (aggregate_ssim)
@@ -1027,6 +1045,14 @@ double RunVmaf(const char* fmt, int width, int height,
     else // default
     {
         printf("VMAF score = %f\n", aggregate_vmaf);
+        if (aggregate_bagging)
+            printf("Bagging score = %f\n", aggregate_bagging);
+        if (aggregate_stddev)
+            printf("StdDev score = %f\n", aggregate_stddev);
+        if (aggregate_ci95_low)
+            printf("CI95_low score = %f\n", aggregate_ci95_low);
+        if (aggregate_ci95_high)
+            printf("CI95_high score = %f\n", aggregate_ci95_high);
         if (aggregate_psnr)
             printf("PSNR score = %f\n", aggregate_psnr);
         if (aggregate_ssim)
@@ -1130,12 +1156,22 @@ double RunVmaf(const char* fmt, int width, int height,
         auto info_node = xml_root.append_child("fyi");
         info_node.append_attribute("numOfFrames") = (int)num_frames_subsampled;
         info_node.append_attribute("aggregateVMAF") = aggregate_vmaf;
+        if (aggregate_bagging)
+            info_node.append_attribute("aggregateBagging") = aggregate_bagging;
+        if (aggregate_stddev)
+            info_node.append_attribute("aggregateStdDev") = aggregate_stddev;
+        if (aggregate_ci95_low)
+            info_node.append_attribute("aggregateCI95_low") = aggregate_ci95_low;
+        if (aggregate_ci95_high)
+            info_node.append_attribute("aggregateCI95_high") = aggregate_ci95_high;
         if (aggregate_psnr)
             info_node.append_attribute("aggregatePSNR") = aggregate_psnr;
         if (aggregate_ssim)
             info_node.append_attribute("aggregateSSIM") = aggregate_ssim;
         if (aggregate_ms_ssim)
             info_node.append_attribute("aggregateMS_SSIM") = aggregate_ms_ssim;
+        if (pool_method)
+            info_node.append_attribute("poolMethod") = pool_method;
         info_node.append_attribute("execFps") = exec_fps;
 #if TIME_TEST_ENABLE
 		info_node.append_attribute("timeTaken") = time_taken;

From 85ddfed11c59bcf8e3863e33d811c59b37806ab0 Mon Sep 17 00:00:00 2001
From: Holy Wu <HolyWu@users.noreply.github.com>
Date: Tue, 12 Feb 2019 14:44:42 +0800
Subject: [PATCH 29/29] Fix empty model name in log on Windows

Windows users may also use forward slashes besides back slahes in file path. Looking for only back slashes results in an empty string being returned when Windows users type forward slashes in the file path.
---
 wrapper/src/vmaf.cpp | 19 +++++++------------
 1 file changed, 7 insertions(+), 12 deletions(-)

diff --git a/wrapper/src/vmaf.cpp b/wrapper/src/vmaf.cpp
index 9475c647e..87650d185 100644
--- a/wrapper/src/vmaf.cpp
+++ b/wrapper/src/vmaf.cpp
@@ -46,7 +46,7 @@ template <class T> static inline T min(T x,T y) { return (x<y)?x:y; }
 #endif
 
 inline double _round_to_digit(double val, int digit);
-string _get_file_name(const std::string& s);
+std::string _get_file_name(const std::string& s);
 
 void SvmDelete::operator()(void *svm)
 {
@@ -1206,16 +1206,11 @@ inline double _round_to_digit(double val, int digit)
     return _round(val * m) / m;
 }
 
-string _get_file_name(const std::string& s)
+std::string _get_file_name(const std::string& s)
 {
-   char sep = '/';
-#ifdef _WIN32
-   sep = '\\';
-#endif
-   size_t i = s.rfind(sep, s.length());
-   if (i != string::npos) {
-      return(s.substr(i+1, s.length() - i));
-   }
-   return("");
+    size_t i = s.find_last_of("/\\", s.length());
+    if (i != std::string::npos) {
+        return(s.substr(i + 1, s.length() - i));
+    }
+    return("");
 }
-