From 231517429b5387eaf1226105d16a89a29404279f Mon Sep 17 00:00:00 2001 From: 2uropa Date: Fri, 4 Jan 2019 15:28:03 +0900 Subject: [PATCH 01/29] Modify some windows build options (#285) * Fix visual studio MULTI-THREADING compile error (#283) * Add static-library build configuration. (#284) --- pthreads/COPKG/pthreads.vcxproj | 45 +++++++++++++++++ ptools/ptools.vcxproj | 75 +++++++++++++++++++++++++++ vmaf.sln | 14 +++++ wrapper/wrapper.vcxproj | 90 +++++++++++++++++++++++++++++++-- wrapper/wrapper.vcxproj.filters | 10 +++- 5 files changed, 230 insertions(+), 4 deletions(-) diff --git a/pthreads/COPKG/pthreads.vcxproj b/pthreads/COPKG/pthreads.vcxproj index e6f6a739a..7e5933f81 100644 --- a/pthreads/COPKG/pthreads.vcxproj +++ b/pthreads/COPKG/pthreads.vcxproj @@ -1,10 +1,26 @@  + + DebugLib + Win32 + + + DebugLib + x64 + Debug Win32 + + ReleaseLib + Win32 + + + ReleaseLib + x64 + Release Win32 @@ -46,6 +62,7 @@ --> true + true false MultiByte @@ -109,6 +126,19 @@ _DEBUG;%(PreprocessorDefinitions) MultiThreadedDebug Default + true + + + true + + + + + Disabled + _DEBUG;%(PreprocessorDefinitions) + MultiThreadedDebugDLL + Default + true true @@ -121,6 +151,21 @@ true NDEBUG;%(PreprocessorDefinitions) MultiThreaded + true + + + true + true + + + + + MaxSpeed + true + true + NDEBUG;%(PreprocessorDefinitions) + MultiThreadedDLL + true true diff --git a/ptools/ptools.vcxproj b/ptools/ptools.vcxproj index 06667f304..046e93b2c 100644 --- a/ptools/ptools.vcxproj +++ b/ptools/ptools.vcxproj @@ -1,10 +1,18 @@  + + DebugLib + x64 + Debug x64 + + ReleaseLib + x64 + Release x64 @@ -49,6 +57,12 @@ v140 MultiByte + + StaticLibrary + true + v140 + MultiByte + StaticLibrary false @@ -56,6 +70,13 @@ true MultiByte + + StaticLibrary + false + v140 + true + MultiByte + @@ -63,20 +84,36 @@ + + + + + + static $(SolutionDir)\$(Platform)\$(Configuration)\ $(Platform)\$(Configuration)\ + + static + $(SolutionDir)\$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\ + static $(SolutionDir)\$(Platform)\$(Configuration)\ $(Platform)\$(Configuration)\ + + static + $(SolutionDir)\$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\ + @@ -87,6 +124,23 @@ true opencontainers_1_8_4\include;..\pthreads;%(AdditionalIncludeDirectories) MultiThreadedDebug + true + + + Windows + + + + + + + Level3 + Disabled + PTW32_STATIC_LIB;OC_NEW_STYLE_INCLUDES;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_WARNINGS;_DEBUG;_LIB;%(PreprocessorDefinitions) + true + opencontainers_1_8_4\include;..\pthreads;%(AdditionalIncludeDirectories) + MultiThreadedDebugDLL + true Windows @@ -104,6 +158,27 @@ true opencontainers_1_8_4\include;..\pthreads;%(AdditionalIncludeDirectories) MultiThreaded + true + + + Windows + true + true + + + + + Level3 + + + MaxSpeed + true + true + PTW32_STATIC_LIB;OC_NEW_STYLE_INCLUDES;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_WARNINGS;NDEBUG;_LIB;%(PreprocessorDefinitions) + true + opencontainers_1_8_4\include;..\pthreads;%(AdditionalIncludeDirectories) + MultiThreadedDLL + true Windows diff --git a/vmaf.sln b/vmaf.sln index 2d91e566d..ea83da7ab 100644 --- a/vmaf.sln +++ b/vmaf.sln @@ -19,21 +19,35 @@ EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|x64 = Debug|x64 + DebugLib|x64 = DebugLib|x64 Release|x64 = Release|x64 + ReleaseLib|x64 = ReleaseLib|x64 EndGlobalSection GlobalSection(ProjectConfigurationPlatforms) = postSolution {C2D3FD1E-9068-494D-9655-88CE906B4C8B}.Debug|x64.ActiveCfg = Debug|x64 {C2D3FD1E-9068-494D-9655-88CE906B4C8B}.Debug|x64.Build.0 = Debug|x64 + {C2D3FD1E-9068-494D-9655-88CE906B4C8B}.DebugLib|x64.ActiveCfg = DebugLib|x64 + {C2D3FD1E-9068-494D-9655-88CE906B4C8B}.DebugLib|x64.Build.0 = DebugLib|x64 {C2D3FD1E-9068-494D-9655-88CE906B4C8B}.Release|x64.ActiveCfg = Release|x64 {C2D3FD1E-9068-494D-9655-88CE906B4C8B}.Release|x64.Build.0 = Release|x64 + {C2D3FD1E-9068-494D-9655-88CE906B4C8B}.ReleaseLib|x64.ActiveCfg = ReleaseLib|x64 + {C2D3FD1E-9068-494D-9655-88CE906B4C8B}.ReleaseLib|x64.Build.0 = ReleaseLib|x64 {3F07B371-1B81-477E-886C-0E079B0A6803}.Debug|x64.ActiveCfg = Debug|x64 {3F07B371-1B81-477E-886C-0E079B0A6803}.Debug|x64.Build.0 = Debug|x64 + {3F07B371-1B81-477E-886C-0E079B0A6803}.DebugLib|x64.ActiveCfg = DebugLib|x64 + {3F07B371-1B81-477E-886C-0E079B0A6803}.DebugLib|x64.Build.0 = DebugLib|x64 {3F07B371-1B81-477E-886C-0E079B0A6803}.Release|x64.ActiveCfg = Release|x64 {3F07B371-1B81-477E-886C-0E079B0A6803}.Release|x64.Build.0 = Release|x64 + {3F07B371-1B81-477E-886C-0E079B0A6803}.ReleaseLib|x64.ActiveCfg = ReleaseLib|x64 + {3F07B371-1B81-477E-886C-0E079B0A6803}.ReleaseLib|x64.Build.0 = ReleaseLib|x64 {03A4C79F-F1A8-48C9-A2AC-0A14EC0F093E}.Debug|x64.ActiveCfg = Debug|x64 {03A4C79F-F1A8-48C9-A2AC-0A14EC0F093E}.Debug|x64.Build.0 = Debug|x64 + {03A4C79F-F1A8-48C9-A2AC-0A14EC0F093E}.DebugLib|x64.ActiveCfg = DebugLib|x64 + {03A4C79F-F1A8-48C9-A2AC-0A14EC0F093E}.DebugLib|x64.Build.0 = DebugLib|x64 {03A4C79F-F1A8-48C9-A2AC-0A14EC0F093E}.Release|x64.ActiveCfg = Release|x64 {03A4C79F-F1A8-48C9-A2AC-0A14EC0F093E}.Release|x64.Build.0 = Release|x64 + {03A4C79F-F1A8-48C9-A2AC-0A14EC0F093E}.ReleaseLib|x64.ActiveCfg = ReleaseLib|x64 + {03A4C79F-F1A8-48C9-A2AC-0A14EC0F093E}.ReleaseLib|x64.Build.0 = ReleaseLib|x64 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/wrapper/wrapper.vcxproj b/wrapper/wrapper.vcxproj index e54b48ce7..639953500 100644 --- a/wrapper/wrapper.vcxproj +++ b/wrapper/wrapper.vcxproj @@ -1,10 +1,18 @@  + + DebugLib + x64 + Debug x64 + + ReleaseLib + x64 + Release x64 @@ -24,6 +32,12 @@ v140 MultiByte + + StaticLibrary + true + v140 + MultiByte + Application false @@ -31,6 +45,13 @@ true MultiByte + + StaticLibrary + false + v140 + true + MultiByte + @@ -38,28 +59,62 @@ + + + + + + true static + + true + static + false static + + false + static + Level3 Disabled - _CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;PTW32_STATIC_LIB;OC_NEW_STYLE_INCLUDES;WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + MULTI_THREADING;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;PTW32_STATIC_LIB;OC_NEW_STYLE_INCLUDES;WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) true ..\feature\src\common;..\ptools;..\ptools\opencontainers_1_8_4\include;..\pthreads;..\feature\src;%(AdditionalIncludeDirectories) MultiThreadedDebug + true + + + Console + true + libpthread-static.lib;%(AdditionalDependencies) + $(SolutionDir)\$(Platform)\$(Configuration)\;%(AdditionalLibraryDirectories) + + + + + + + Level3 + Disabled + MULTI_THREADING;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;PTW32_STATIC_LIB;OC_NEW_STYLE_INCLUDES;WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + ..\feature\src\common;..\ptools;..\ptools\opencontainers_1_8_4\include;..\pthreads;..\feature\src;%(AdditionalIncludeDirectories) + MultiThreadedDebugDLL + true Console @@ -76,11 +131,36 @@ MaxSpeed true true - _CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;PTW32_STATIC_LIB;OC_NEW_STYLE_INCLUDES;WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + MULTI_THREADING;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;PTW32_STATIC_LIB;OC_NEW_STYLE_INCLUDES;WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) true ..\feature\src\common;..\ptools;..\ptools\opencontainers_1_8_4\include;..\pthreads;..\feature\src;%(AdditionalIncludeDirectories) MultiThreaded AdvancedVectorExtensions2 + true + + + Console + true + true + true + libpthread-static.lib;%(AdditionalDependencies) + $(SolutionDir)\$(Platform)\$(Configuration)\;%(AdditionalLibraryDirectories) + + + + + Level3 + + + MaxSpeed + true + true + MULTI_THREADING;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;PTW32_STATIC_LIB;OC_NEW_STYLE_INCLUDES;WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + ..\feature\src\common;..\ptools;..\ptools\opencontainers_1_8_4\include;..\pthreads;..\feature\src;%(AdditionalIncludeDirectories) + MultiThreadedDLL + AdvancedVectorExtensions2 + true Console @@ -98,6 +178,7 @@ + @@ -119,6 +200,7 @@ + @@ -134,6 +216,7 @@ + @@ -151,6 +234,7 @@ + @@ -164,4 +248,4 @@ - + \ No newline at end of file diff --git a/wrapper/wrapper.vcxproj.filters b/wrapper/wrapper.vcxproj.filters index 34fa83ad9..ed47ca6e3 100644 --- a/wrapper/wrapper.vcxproj.filters +++ b/wrapper/wrapper.vcxproj.filters @@ -76,6 +76,10 @@ feature + + feature + + @@ -168,6 +172,10 @@ + + feature + + @@ -183,4 +191,4 @@ {4b256925-f7e1-49e2-9c3b-33c128ce4e37} - + \ No newline at end of file From 76dc78e601dd7983f85cb1699ff383a9cd59570c Mon Sep 17 00:00:00 2001 From: Christos Bampis Date: Mon, 7 Jan 2019 15:09:58 -0800 Subject: [PATCH 02/29] Add matlab imports in run_testing; run mex for ST-MAD. --- matlab/STMAD_2011_MatlabCode/Example.m | 17 ++----------- python/script/run_testing.py | 1 + .../src/vmaf/core/matlab_feature_extractor.py | 25 +++++++++++++++++-- python/src/vmaf/core/matlab_quality_runner.py | 5 ++++ 4 files changed, 31 insertions(+), 17 deletions(-) diff --git a/matlab/STMAD_2011_MatlabCode/Example.m b/matlab/STMAD_2011_MatlabCode/Example.m index b513ef2bc..13670002f 100644 --- a/matlab/STMAD_2011_MatlabCode/Example.m +++ b/matlab/STMAD_2011_MatlabCode/Example.m @@ -2,22 +2,9 @@ clear; close all; -% Wid = 176; -% Hei = 144; - -% OrgFile = 'foreman_org_qcif.yuv'; - -% DstFile = 'foreman_dst_qcif.yuv'; - -% MadVals = STMAD_index(OrgFile, DstFile, Wid, Hei); -% MadVals = run_stmad(OrgFile, DstFile, Wid, Hei); - Hei = 324; Wid = 576; -OrgFile = ... - '/home/cbampis/Projects/stash/MCE/vmaf_oss/vmaf/python/test/resource/yuv/src01_hrc00_576x324.yuv'; -DstFile = ... - '/home/cbampis/Projects/stash/MCE/vmaf_oss/vmaf/python/test/resource/yuv/src01_hrc01_576x324.yuv'; +OrgFile = '../../python/test/resource/yuv/src01_hrc00_576x324.yuv'; +DstFile = '../../python/test/resource/yuv/src01_hrc01_576x324.yuv'; run_stmad(OrgFile, DstFile, Wid, Hei); - diff --git a/python/script/run_testing.py b/python/script/run_testing.py index 885cb5087..7ae2621dc 100755 --- a/python/script/run_testing.py +++ b/python/script/run_testing.py @@ -12,6 +12,7 @@ from vmaf.core.result_store import FileSystemResultStore from vmaf.tools.misc import import_python_file, get_cmd_option, cmd_option_exists from vmaf.core.quality_runner import QualityRunner, VmafQualityRunner, BootstrapVmafQualityRunner +from vmaf.core.matlab_quality_runner import STMADQualityRunner, SpEEDMatlabQualityRunner, StrredQualityRunner, StrredOptQualityRunner from vmaf.routine import run_test_on_dataset, print_matplotlib_warning from vmaf.tools.stats import ListStats diff --git a/python/src/vmaf/core/matlab_feature_extractor.py b/python/src/vmaf/core/matlab_feature_extractor.py index 1953589bb..480172e60 100644 --- a/python/src/vmaf/core/matlab_feature_extractor.py +++ b/python/src/vmaf/core/matlab_feature_extractor.py @@ -154,8 +154,6 @@ def _generate_result(self, asset): log_file_path=log_file_path, ) - # print(strred_cmd) - if self.logger: self.logger.info(strredopt_cmd) @@ -296,6 +294,28 @@ class STMADFeatureExtractor(MatlabFeatureExtractor): MATLAB_WORKSPACE = VmafConfig.root_path('matlab', 'STMAD_2011_MatlabCode') + # compile necessary functions; need to use mex from within matlab + def _custom_init(self): + + def run_stmad_cmd(stmad_cmd): + + current_dir = os.getcwd() + '/' + os.chdir(self.MATLAB_WORKSPACE) + run_process(stmad_cmd, shell=True) + os.chdir(current_dir) + + stmad_mex_cmd_1 = '''{matlab} -nodisplay -nosplash -nodesktop -r "mex ical_std.c; exit;"'''.format( + matlab=VmafExternalConfig.get_and_assert_matlab(), + ) + + run_stmad_cmd(stmad_mex_cmd_1) + + stmad_mex_cmd_2 = '''{matlab} -nodisplay -nosplash -nodesktop -r "mex ical_stat.c; exit;"'''.format( + matlab=VmafExternalConfig.get_and_assert_matlab(), + ) + + run_stmad_cmd(stmad_mex_cmd_2) + @classmethod def _assert_an_asset(cls, asset): super(STMADFeatureExtractor, cls)._assert_an_asset(asset) @@ -326,6 +346,7 @@ def _generate_result(self, asset): h=quality_height, log_file_path=log_file_path, ) + if self.logger: self.logger.info(stmad_cmd) diff --git a/python/src/vmaf/core/matlab_quality_runner.py b/python/src/vmaf/core/matlab_quality_runner.py index c565779ca..63acec42a 100644 --- a/python/src/vmaf/core/matlab_quality_runner.py +++ b/python/src/vmaf/core/matlab_quality_runner.py @@ -1,3 +1,7 @@ +import os + +from vmaf.config import VmafExternalConfig, VmafConfig +from vmaf.tools.misc import run_process from vmaf.core.feature_assembler import FeatureAssembler from vmaf.core.matlab_feature_extractor import StrredFeatureExtractor, StrredOptFeatureExtractor, SpEEDMatlabFeatureExtractor, STMADFeatureExtractor from vmaf.core.quality_runner import QualityRunner @@ -162,6 +166,7 @@ def _remove_result(self, asset): class STMADQualityRunner(QualityRunner): + TYPE = 'STMAD' VERSION = 'F' + STMADFeatureExtractor.VERSION + '-1.1' From b87b5c74471b790c179dd73de2bbb44b8a76ff63 Mon Sep 17 00:00:00 2001 From: Christos Bampis Date: Mon, 7 Jan 2019 15:51:31 -0800 Subject: [PATCH 03/29] Add documentation for Matlab implementations. --- README.md | 4 ++++ resource/doc/matlab_usage.md | 40 ++++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+) create mode 100644 resource/doc/matlab_usage.md diff --git a/README.md b/README.md index 4e799b715..8e04f861e 100644 --- a/README.md +++ b/README.md @@ -40,6 +40,10 @@ Besides the default VMAF model which predicts the quality of videos displayed on Since VDK v1.3.7 (June 2018), we have introduced a way to quantify the level of confidence that a VMAF prediction entails. Each VMAF prediction score now can come with a 95% confidence interval (CI), which quantifies the level of confidence that the prediction lies within the interval. Refer to the [VMAF confidence interval](resource/doc/conf_interval.md) page for more details. +## Matlab Functionality + +Besides the Python/C/C++ part of the repository, we also introduced a number of algorithms that are implemented in Matlab. For example, users can calculate ST-RRED, ST-MAD, SpEED-QA, and BRISQUE. For more details, see the [Matlab Usage](resource/doc/matlab_usage.md) page for more details. + ## References Refer to the [references](resource/doc/references.md) page. diff --git a/resource/doc/matlab_usage.md b/resource/doc/matlab_usage.md new file mode 100644 index 000000000..bb8638b77 --- /dev/null +++ b/resource/doc/matlab_usage.md @@ -0,0 +1,40 @@ +Matlab Usage +=================== + +## Prerequisites + +To be able to use Matlab, you need to first successfully download and activate Matlab, as described [here](https://www.mathworks.com/). Then, add a line to python/src/vmaf/externals.py, like: + +``` +MATLAB_PATH = +``` + +For example (mac OSX): + +``` +MATLAB_PATH = "/Applications/MATLAB_R2017a.app/bin/matlab" +``` + +## Available Algorithms + +The available algorithms are ST-MAD [1], ST-RRED [2], SpEED-QA [3] and BRISQUE [4]. + +Example usage for ST-MAD, ST-RRED and SpEED-QA with the run_testing script: + +./run_testing quality_type dataset_file + +where quality_type can be STMAD (for ST-MAD), STRRED (for ST-RRED) and SpEED_Matlab (for SpEED-QA). We have also implemented a computationally efficient version of ST-RRED (use STRREDOpt as the quality_type), which produces numerically identical results to ST-RRED. + +Example usage for BRISQUE: + +./run_vmaf yuv_420p 1920 1080 NFLX_dataset_public/ref/OldTownCross_25fps.yuv NFLX_dataset_public/dis/OldTownCross_90_1080_4300.yuv --model model/vmaf_brisque_all_v0.0rc.pkl + +## References + +[1] P. V. Vu, C. T. Vu, and D. M. Chandler, "A spatiotemporal mostapparent-distortion model for video quality assessment," IEEE Int’l Conf. Image Process., pp. 2505–2508, 2011. + +[2] R. Soundararajan and A. C. Bovik, "Video quality assessment by reduced reference spatio-temporal entropic differencing," IEEE Trans. Circ. Syst. Video Technol., vol. 23, no. 4, pp. 684–694, Apr. 2013. + +[3] C. G. Bampis, P. Gupta, R. Soundararajan, and A. C. Bovik, "SpEEDQA: Spatial efficient entropic differencing for image and video quality," IEEE Signal Process. Lett., vol. 24, no. 9, pp. 1333–1337, 2017. + +[4] A. Mittal, A. K. Moorthy, and A. C. Bovik, "No-reference image quality assessment in the spatial domain," IEEE Trans. Image Process., vol. 21, no. 12, pp. 4695–4708, Dec. 2012. From f88761224162f0b6337fc91e5c117fcca4baa9e2 Mon Sep 17 00:00:00 2001 From: Zhi Li Date: Thu, 10 Jan 2019 13:47:36 -0800 Subject: [PATCH 04/29] Update conf_interval.md --- resource/doc/conf_interval.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resource/doc/conf_interval.md b/resource/doc/conf_interval.md index 03eed94fa..8b0034bed 100644 --- a/resource/doc/conf_interval.md +++ b/resource/doc/conf_interval.md @@ -11,7 +11,7 @@ There are two ways to perform bootstrapping on VMAF. The first one is called pla ### Run in Command Line -To enable CI, use the option `--ci` in the command line tools with a bootstrapping model such as `model/vmaf_rb_v0.6.2/vmaf_rb_v0.6.2.pkl`. +To enable CI, use the option `--ci` in the command line tools with a bootstrapping model such as `model/vmaf_rb_v0.6.2/vmaf_rb_v0.6.2.pkl`. The `--ci` option is available for both `./run_vmaf` and `./wrapper/vmafossexec`. In [libvmaf](libvmaf.md), CI can be enabled by setting the argument `enable_conf_interval` to 1. For example, running From 3c169056c20a4d158c7f08fd3d072cc04c29234e Mon Sep 17 00:00:00 2001 From: fishjam Date: Sat, 12 Jan 2019 03:05:42 +0800 Subject: [PATCH 05/29] refactor windows solution: (#291) * refactor windows solution: 1.add libvmaf(static lib), get information from wrapper; 2.remove unused DebugLib and ReleaseLib(since there is libvmaf, does not need build lib from wrapper.vcxproj) 3.remove unused x86 configuration. * 1.add project for feature tools(ref feature\Makefile) and add some missing files in libvmaf 2.add feature/examples.bat(ref feature\examples) and will copy to output dir while building; 3.add BuildForWindows.md * Update conf_interval.md --- BuildForWindows.md | 8 + feature/examples.bat | 33 +++ feature/vs2015/moment/moment.vcxproj | 99 ++++++++ feature/vs2015/moment/moment.vcxproj.filters | 22 ++ feature/vs2015/ms_ssim/ms_ssim.vcxproj | 99 ++++++++ .../vs2015/ms_ssim/ms_ssim.vcxproj.filters | 22 ++ feature/vs2015/psnr/psnr.vcxproj | 99 ++++++++ feature/vs2015/psnr/psnr.vcxproj.filters | 22 ++ feature/vs2015/ssim/ssim.vcxproj | 95 +++++++ feature/vs2015/ssim/ssim.vcxproj.filters | 22 ++ feature/vs2015/vmaf/vmaf.vcxproj | 95 +++++++ feature/vs2015/vmaf/vmaf.vcxproj.filters | 22 ++ pthreads/COPKG/pthreads.vcxproj | 51 ---- ptools/ptools.vcxproj | 73 ------ vmaf.sln | 76 +++++- wrapper/libvmaf.vcxproj | 160 ++++++++++++ wrapper/libvmaf.vcxproj.filters | 240 ++++++++++++++++++ wrapper/wrapper.vcxproj | 158 +----------- wrapper/wrapper.vcxproj.filters | 188 -------------- 19 files changed, 1110 insertions(+), 474 deletions(-) create mode 100644 BuildForWindows.md create mode 100644 feature/examples.bat create mode 100644 feature/vs2015/moment/moment.vcxproj create mode 100644 feature/vs2015/moment/moment.vcxproj.filters create mode 100644 feature/vs2015/ms_ssim/ms_ssim.vcxproj create mode 100644 feature/vs2015/ms_ssim/ms_ssim.vcxproj.filters create mode 100644 feature/vs2015/psnr/psnr.vcxproj create mode 100644 feature/vs2015/psnr/psnr.vcxproj.filters create mode 100644 feature/vs2015/ssim/ssim.vcxproj create mode 100644 feature/vs2015/ssim/ssim.vcxproj.filters create mode 100644 feature/vs2015/vmaf/vmaf.vcxproj create mode 100644 feature/vs2015/vmaf/vmaf.vcxproj.filters create mode 100644 wrapper/libvmaf.vcxproj create mode 100644 wrapper/libvmaf.vcxproj.filters diff --git a/BuildForWindows.md b/BuildForWindows.md new file mode 100644 index 000000000..1200f5269 --- /dev/null +++ b/BuildForWindows.md @@ -0,0 +1,8 @@ +# Prepare + - Visual Studio 2015 on Windows + +# Steps + - 1.open [vmaf.sln](vmaf.sln) in Visual Studio 2015 + - 2.Select Build => Batch Build from menu + - 3.Select the Solution Config as your want or Select All, then Build. + - 4.After Build, you will find all the build result in $(SolutionDir)/x64/$(Configuration), and there is examples.bat that you can run. diff --git a/feature/examples.bat b/feature/examples.bat new file mode 100644 index 000000000..fac258ff9 --- /dev/null +++ b/feature/examples.bat @@ -0,0 +1,33 @@ +rem refer feature/examples + +vmaf +echo "" + +echo "run adm:" +vmaf adm yuv420p ..\..\python\test\resource\yuv\src01_hrc00_576x324.yuv ..\..\python\test\resource\yuv\src01_hrc01_576x324.yuv 576 324 + +echo "run ansnr:" +vmaf ansnr yuv420p ..\..\python\test\resource\yuv\src01_hrc00_576x324.yuv ..\..\python\test\resource\yuv\src01_hrc01_576x324.yuv 576 324 + +echo "run motion:" +vmaf motion yuv420p ..\..\python\test\resource\yuv\src01_hrc00_576x324.yuv ..\..\python\test\resource\yuv\src01_hrc01_576x324.yuv 576 324 + +echo "run vif:" +vmaf vif yuv420p ..\..\python\test\resource\yuv\src01_hrc00_576x324.yuv ..\..\python\test\resource\yuv\src01_hrc01_576x324.yuv 576 324 + +echo "run all:" +vmaf all yuv420p ..\..\python\test\resource\yuv\src01_hrc00_576x324.yuv ..\..\python\test\resource\yuv\src01_hrc01_576x324.yuv 576 324 + +echo "run psnr:" +psnr yuv420p ..\..\python\test\resource\yuv\src01_hrc00_576x324.yuv ..\..\python\test\resource\yuv\src01_hrc01_576x324.yuv 576 324 + +echo "run 2nd moment:" +moment 2 yuv420p ..\..\python\test\resource\yuv\src01_hrc00_576x324.yuv 576 324 + +echo "run ssim:" +ssim yuv420p ..\..\python\test\resource\yuv\src01_hrc00_576x324.yuv ..\..\python\test\resource\yuv\src01_hrc01_576x324.yuv 576 324 + +echo "run ms_ssim:" +ms_ssim yuv420p ..\..\python\test\resource\yuv\src01_hrc00_576x324.yuv ..\..\python\test\resource\yuv\src01_hrc01_576x324.yuv 576 324 + +echo "done." diff --git a/feature/vs2015/moment/moment.vcxproj b/feature/vs2015/moment/moment.vcxproj new file mode 100644 index 000000000..4215cd149 --- /dev/null +++ b/feature/vs2015/moment/moment.vcxproj @@ -0,0 +1,99 @@ + + + + + Debug + x64 + + + Release + x64 + + + + + + + {D67BDBF2-D42F-465D-BABD-A381BFFAA373} + Win32Proj + moment + 8.1 + + + + Application + true + v140 + MultiByte + + + Application + false + v140 + true + MultiByte + + + + + + + + + + + + + + + true + + + false + + + + + + Level3 + Disabled + MULTI_THREADING;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;PTW32_STATIC_LIB;OC_NEW_STYLE_INCLUDES;WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + ..\feature\src\common;..\ptools;..\ptools\opencontainers_1_8_4\include;..\pthreads;..\feature\src;%(AdditionalIncludeDirectories) + true + true + MultiThreadedDebug + + + Console + true + $(SolutionDir)\$(Platform)\$(Configuration)\;%(AdditionalLibraryDirectories) + libvmaf.lib;%(AdditionalDependencies) + + + + + Level3 + + + MaxSpeed + true + true + MULTI_THREADING;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;PTW32_STATIC_LIB;OC_NEW_STYLE_INCLUDES;WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + ..\feature\src\common;..\ptools;..\ptools\opencontainers_1_8_4\include;..\pthreads;..\feature\src;%(AdditionalIncludeDirectories) + true + true + MultiThreaded + + + Console + true + true + true + $(SolutionDir)\$(Platform)\$(Configuration)\;%(AdditionalLibraryDirectories) + libvmaf.lib;%(AdditionalDependencies) + + + + + + \ No newline at end of file diff --git a/feature/vs2015/moment/moment.vcxproj.filters b/feature/vs2015/moment/moment.vcxproj.filters new file mode 100644 index 000000000..4acb18b59 --- /dev/null +++ b/feature/vs2015/moment/moment.vcxproj.filters @@ -0,0 +1,22 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + \ No newline at end of file diff --git a/feature/vs2015/ms_ssim/ms_ssim.vcxproj b/feature/vs2015/ms_ssim/ms_ssim.vcxproj new file mode 100644 index 000000000..c69acbfc9 --- /dev/null +++ b/feature/vs2015/ms_ssim/ms_ssim.vcxproj @@ -0,0 +1,99 @@ + + + + + Debug + x64 + + + Release + x64 + + + + + + + {CF8FA427-306B-4803-9F23-31C229A630B6} + Win32Proj + ms_ssim + 8.1 + + + + Application + true + v140 + MultiByte + + + Application + false + v140 + true + MultiByte + + + + + + + + + + + + + + + true + + + false + + + + + + Level3 + Disabled + MULTI_THREADING;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;PTW32_STATIC_LIB;OC_NEW_STYLE_INCLUDES;WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + ..\feature\src\common;..\ptools;..\ptools\opencontainers_1_8_4\include;..\pthreads;..\feature\src;%(AdditionalIncludeDirectories) + true + MultiThreadedDebug + + + Console + true + $(SolutionDir)\$(Platform)\$(Configuration)\;%(AdditionalLibraryDirectories) + libvmaf.lib;%(AdditionalDependencies) + + + + + Level3 + + + MaxSpeed + true + true + MULTI_THREADING;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;PTW32_STATIC_LIB;OC_NEW_STYLE_INCLUDES;WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + ..\feature\src\common;..\ptools;..\ptools\opencontainers_1_8_4\include;..\pthreads;..\feature\src;%(AdditionalIncludeDirectories) + true + MultiThreaded + + + Console + true + true + true + $(SolutionDir)\$(Platform)\$(Configuration)\;%(AdditionalLibraryDirectories) + libvmaf.lib;%(AdditionalDependencies) + + + + + + \ No newline at end of file diff --git a/feature/vs2015/ms_ssim/ms_ssim.vcxproj.filters b/feature/vs2015/ms_ssim/ms_ssim.vcxproj.filters new file mode 100644 index 000000000..3de31b095 --- /dev/null +++ b/feature/vs2015/ms_ssim/ms_ssim.vcxproj.filters @@ -0,0 +1,22 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + \ No newline at end of file diff --git a/feature/vs2015/psnr/psnr.vcxproj b/feature/vs2015/psnr/psnr.vcxproj new file mode 100644 index 000000000..dcaa53f6d --- /dev/null +++ b/feature/vs2015/psnr/psnr.vcxproj @@ -0,0 +1,99 @@ + + + + + Debug + x64 + + + Release + x64 + + + + + + + {2DC3E418-09C6-49F4-A8DA-04C614D6016D} + Win32Proj + psnr + 8.1 + + + + Application + true + v140 + MultiByte + + + Application + false + v140 + true + MultiByte + + + + + + + + + + + + + + + true + + + false + + + + + + Level3 + Disabled + MULTI_THREADING;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;PTW32_STATIC_LIB;OC_NEW_STYLE_INCLUDES;WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + ..\feature\src\common;..\ptools;..\ptools\opencontainers_1_8_4\include;..\pthreads;..\feature\src;%(AdditionalIncludeDirectories) + true + true + MultiThreadedDebug + + + Console + true + $(SolutionDir)\$(Platform)\$(Configuration)\;%(AdditionalLibraryDirectories) + libvmaf.lib;%(AdditionalDependencies) + + + + + Level3 + + + MaxSpeed + true + true + MULTI_THREADING;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;PTW32_STATIC_LIB;OC_NEW_STYLE_INCLUDES;WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + ..\feature\src\common;..\ptools;..\ptools\opencontainers_1_8_4\include;..\pthreads;..\feature\src;%(AdditionalIncludeDirectories) + true + true + MultiThreaded + + + Console + true + true + true + $(SolutionDir)\$(Platform)\$(Configuration)\;%(AdditionalLibraryDirectories) + libvmaf.lib;%(AdditionalDependencies) + + + + + + \ No newline at end of file diff --git a/feature/vs2015/psnr/psnr.vcxproj.filters b/feature/vs2015/psnr/psnr.vcxproj.filters new file mode 100644 index 000000000..479b15852 --- /dev/null +++ b/feature/vs2015/psnr/psnr.vcxproj.filters @@ -0,0 +1,22 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + \ No newline at end of file diff --git a/feature/vs2015/ssim/ssim.vcxproj b/feature/vs2015/ssim/ssim.vcxproj new file mode 100644 index 000000000..f51ef96a8 --- /dev/null +++ b/feature/vs2015/ssim/ssim.vcxproj @@ -0,0 +1,95 @@ + + + + + Debug + x64 + + + Release + x64 + + + + + + + {418D8FFD-D23A-4C56-8A94-D4B9D39083D1} + Win32Proj + ssim + 8.1 + + + + Application + true + v140 + MultiByte + + + Application + false + v140 + true + MultiByte + + + + + + + + + + + + + + + true + + + false + + + + + + Level3 + Disabled + MULTI_THREADING;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;PTW32_STATIC_LIB;OC_NEW_STYLE_INCLUDES;WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + MultiThreadedDebug + + + Console + true + libvmaf.lib;%(AdditionalDependencies) + $(SolutionDir)\$(Platform)\$(Configuration)\;%(AdditionalLibraryDirectories) + + + + + Level3 + + + MaxSpeed + true + true + MULTI_THREADING;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;PTW32_STATIC_LIB;OC_NEW_STYLE_INCLUDES;WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + MultiThreaded + + + Console + true + true + true + libvmaf.lib;%(AdditionalDependencies) + $(SolutionDir)\$(Platform)\$(Configuration)\;%(AdditionalLibraryDirectories) + + + + + + \ No newline at end of file diff --git a/feature/vs2015/ssim/ssim.vcxproj.filters b/feature/vs2015/ssim/ssim.vcxproj.filters new file mode 100644 index 000000000..5c5c46ba4 --- /dev/null +++ b/feature/vs2015/ssim/ssim.vcxproj.filters @@ -0,0 +1,22 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + \ No newline at end of file diff --git a/feature/vs2015/vmaf/vmaf.vcxproj b/feature/vs2015/vmaf/vmaf.vcxproj new file mode 100644 index 000000000..fcc0f75ac --- /dev/null +++ b/feature/vs2015/vmaf/vmaf.vcxproj @@ -0,0 +1,95 @@ + + + + + Debug + x64 + + + Release + x64 + + + + + + + {054010E9-3004-4C24-B0F3-DCCE36D6B436} + Win32Proj + vmaf + 8.1 + + + + Application + true + v140 + MultiByte + + + Application + false + v140 + true + MultiByte + + + + + + + + + + + + + + + true + + + false + + + + + + Level3 + Disabled + MULTI_THREADING;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;PTW32_STATIC_LIB;OC_NEW_STYLE_INCLUDES;WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + ..\feature\src\common;..\ptools;..\ptools\opencontainers_1_8_4\include;..\pthreads;..\feature\src;%(AdditionalIncludeDirectories) + MultiThreadedDebug + + + Console + true + $(SolutionDir)\$(Platform)\$(Configuration)\;%(AdditionalLibraryDirectories) + libvmaf.lib;libpthread-static.lib;%(AdditionalDependencies) + + + + + Level3 + + + MaxSpeed + true + true + MULTI_THREADING;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;PTW32_STATIC_LIB;OC_NEW_STYLE_INCLUDES;WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + ..\feature\src\common;..\ptools;..\ptools\opencontainers_1_8_4\include;..\pthreads;..\feature\src;%(AdditionalIncludeDirectories) + MultiThreaded + + + Console + true + true + true + $(SolutionDir)\$(Platform)\$(Configuration)\;%(AdditionalLibraryDirectories) + libvmaf.lib;libpthread-static.lib;%(AdditionalDependencies) + + + + + + \ No newline at end of file diff --git a/feature/vs2015/vmaf/vmaf.vcxproj.filters b/feature/vs2015/vmaf/vmaf.vcxproj.filters new file mode 100644 index 000000000..6b6a90d60 --- /dev/null +++ b/feature/vs2015/vmaf/vmaf.vcxproj.filters @@ -0,0 +1,22 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + \ No newline at end of file diff --git a/pthreads/COPKG/pthreads.vcxproj b/pthreads/COPKG/pthreads.vcxproj index 7e5933f81..4f9cc09c4 100644 --- a/pthreads/COPKG/pthreads.vcxproj +++ b/pthreads/COPKG/pthreads.vcxproj @@ -1,30 +1,6 @@  - - DebugLib - Win32 - - - DebugLib - x64 - - - Debug - Win32 - - - ReleaseLib - Win32 - - - ReleaseLib - x64 - - - Release - Win32 - Debug x64 @@ -62,7 +38,6 @@ --> true - true false MultiByte @@ -132,18 +107,6 @@ true - - - Disabled - _DEBUG;%(PreprocessorDefinitions) - MultiThreadedDebugDLL - Default - true - - - true - - MaxSpeed @@ -158,20 +121,6 @@ true - - - MaxSpeed - true - true - NDEBUG;%(PreprocessorDefinitions) - MultiThreadedDLL - true - - - true - true - - C8GugwUyC06 zb90B)^FLrq+u?3M|I=;%ziyxZXuH9i{6M`Ip~>glo+NPd*)B5f!w3Y9pQBF=`1|8f2; zum>-XbclZdeLqJU?wxQK#p{ndjBh*z)#?Nc>%SGad8W~C{dx9>>jn(1@E-$qa%R9@ z*)p6T>h3&$!?QG8`1Fh4%m2mBpRs*f1)hK4c?wRxZU=sjPv;?h_Hu6%LU{g=ctvQE zr6I{%|3hfs94@wg2J%;e1oGq)Q!3#9Vu9m7&r@`YpR1Opt$$pm=T2U3_M}^@b{BM! n{{lCkBE8i!g^xUg$?#F}|9=#9@mqhYp0L0Coj>^K`v3nQXyyId diff --git a/pthreads/COPKG/pthreads.sln b/pthreads/COPKG/pthreads.sln deleted file mode 100644 index 0d0483282..000000000 --- a/pthreads/COPKG/pthreads.sln +++ /dev/null @@ -1,28 +0,0 @@ - -Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio 14 -VisualStudioVersion = 14.0.25420.1 -MinimumVisualStudioVersion = 10.0.40219.1 -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pthreads", "pthreads.vcxproj", "{03A4C79F-F1A8-48C9-A2AC-0A14EC0F093E}" -EndProject -Global - GlobalSection(SolutionConfigurationPlatforms) = preSolution - Debug|x64 = Debug|x64 - Debug|x86 = Debug|x86 - Release|x64 = Release|x64 - Release|x86 = Release|x86 - EndGlobalSection - GlobalSection(ProjectConfigurationPlatforms) = postSolution - {03A4C79F-F1A8-48C9-A2AC-0A14EC0F093E}.Debug|x64.ActiveCfg = Debug|x64 - {03A4C79F-F1A8-48C9-A2AC-0A14EC0F093E}.Debug|x64.Build.0 = Debug|x64 - {03A4C79F-F1A8-48C9-A2AC-0A14EC0F093E}.Debug|x86.ActiveCfg = Debug|Win32 - {03A4C79F-F1A8-48C9-A2AC-0A14EC0F093E}.Debug|x86.Build.0 = Debug|Win32 - {03A4C79F-F1A8-48C9-A2AC-0A14EC0F093E}.Release|x64.ActiveCfg = Release|x64 - {03A4C79F-F1A8-48C9-A2AC-0A14EC0F093E}.Release|x64.Build.0 = Release|x64 - {03A4C79F-F1A8-48C9-A2AC-0A14EC0F093E}.Release|x86.ActiveCfg = Release|Win32 - {03A4C79F-F1A8-48C9-A2AC-0A14EC0F093E}.Release|x86.Build.0 = Release|Win32 - EndGlobalSection - GlobalSection(SolutionProperties) = preSolution - HideSolutionNode = FALSE - EndGlobalSection -EndGlobal diff --git a/pthreads/COPKG/pthreads.vcxproj b/pthreads/COPKG/pthreads.vcxproj index 4f9cc09c4..236855386 100644 --- a/pthreads/COPKG/pthreads.vcxproj +++ b/pthreads/COPKG/pthreads.vcxproj @@ -11,148 +11,98 @@ + true {03A4C79F-F1A8-48C9-A2AC-0A14EC0F093E} - pthreads - pthread + Win32Proj + pthreads 8.1 - + StaticLibrary + true v140 - Cdecl - libpthread - - - - - - - - - - - true - false MultiByte - - + + + + + + - - - - - -static - -stdcall - -static-stdcall - $(LibName)$(OutNameSuffix) - $(SolutionDir) - $(MSBuildThisFileDirectory)..\ - $(SolutionDir)\$(Platform)\$(Configuration)\ - $(Platform)\$(Configuration)\ - $(OutDir)bin\ - $(OutDir)lib\ - $(OutName) - - + + + ..\;%(AdditionalIncludeDirectories) Level3 - WIN32;_LIB;PTW32_BUILD;%(PreprocessorDefinitions) - PTW32_STATIC_LIB;%(PreprocessorDefinitions) + true + true + PTW32_BUILD_INLINED;PTW32_STATIC_LIB;_DEBUG;%(PreprocessorDefinitions) + MultiThreadedDebug Windows - true - + - Disabled - _DEBUG;%(PreprocessorDefinitions) - MultiThreadedDebug - Default - true - - - true - - - - - MaxSpeed - true - true - NDEBUG;%(PreprocessorDefinitions) - MultiThreaded - true + ..\;%(AdditionalIncludeDirectories) + Level3 + true + PTW32_BUILD_INLINED;PTW32_STATIC_LIB;NDEBUG;%(PreprocessorDefinitions) + MultiThreaded + false + Windows true true - - - + + + + - - - - - + + + + + + diff --git a/pthreads/COPKG/pthreads.vcxproj.filters b/pthreads/COPKG/pthreads.vcxproj.filters index df9d00a3f..f28de9c57 100644 --- a/pthreads/COPKG/pthreads.vcxproj.filters +++ b/pthreads/COPKG/pthreads.vcxproj.filters @@ -7,7 +7,7 @@ {93995380-89BD-4b04-88EB-625FBE52EBFB} - h;hpp;hxx;hm;inl;inc;xsd + h;hh;hpp;hxx;hm;inl;inc;xsd {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} @@ -18,6 +18,9 @@ Source Files + + Source Files + Source Files @@ -36,6 +39,9 @@ Source Files + + Source Files + Source Files @@ -51,6 +57,9 @@ Source Files + + Source Files + Source Files @@ -63,6 +72,9 @@ Source Files + + Source Files + Source Files @@ -72,24 +84,18 @@ Source Files - - Source Files - - - Source Files - - - Source Files - - - Source Files - + + Header Files + + + Header Files + Header Files - + Header Files diff --git a/pthreads/COPKG/pthreads.vcxproj.user b/pthreads/COPKG/pthreads.vcxproj.user deleted file mode 100644 index abe8dd896..000000000 --- a/pthreads/COPKG/pthreads.vcxproj.user +++ /dev/null @@ -1,4 +0,0 @@ - - - - \ No newline at end of file diff --git a/ptools/ptools.vcxproj b/ptools/ptools.vcxproj index fe78ce86c..452735c9d 100644 --- a/ptools/ptools.vcxproj +++ b/ptools/ptools.vcxproj @@ -10,33 +10,8 @@ x64 - - - - - - - - - - - - - - - - - - - - - - - - - - + true {3F07B371-1B81-477E-886C-0E079B0A6803} Win32Proj ptools @@ -59,7 +34,8 @@ - + + @@ -67,27 +43,16 @@ - - static - $(SolutionDir)\$(Platform)\$(Configuration)\ - $(Platform)\$(Configuration)\ - - - static - $(SolutionDir)\$(Platform)\$(Configuration)\ - $(Platform)\$(Configuration)\ - + + - - + opencontainers_1_8_4\include;..\pthreads;%(AdditionalIncludeDirectories) Level3 - Disabled - PTW32_STATIC_LIB;OC_NEW_STYLE_INCLUDES;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_WARNINGS;_DEBUG;_LIB;%(PreprocessorDefinitions) true - opencontainers_1_8_4\include;..\pthreads;%(AdditionalIncludeDirectories) - MultiThreadedDebug true + PTW32_STATIC_LIB;OC_NEW_STYLE_INCLUDES;_DEBUG;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) + MultiThreadedDebug Windows @@ -95,17 +60,12 @@ - Level3 - - - MaxSpeed - true - true - PTW32_STATIC_LIB;OC_NEW_STYLE_INCLUDES;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_WARNINGS;NDEBUG;_LIB;%(PreprocessorDefinitions) - true opencontainers_1_8_4\include;..\pthreads;%(AdditionalIncludeDirectories) - MultiThreaded + Level3 true + PTW32_STATIC_LIB;OC_NEW_STYLE_INCLUDES;NDEBUG;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) + MultiThreaded + false Windows @@ -113,5 +73,27 @@ true + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/ptools/ptools.vcxproj.filters b/ptools/ptools.vcxproj.filters new file mode 100644 index 000000000..bf6f7486d --- /dev/null +++ b/ptools/ptools.vcxproj.filters @@ -0,0 +1,69 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + \ No newline at end of file diff --git a/vmaf.sln b/vmaf.sln index ab12c1272..f21121ec8 100644 --- a/vmaf.sln +++ b/vmaf.sln @@ -3,17 +3,9 @@ Microsoft Visual Studio Solution File, Format Version 12.00 # Visual Studio 14 VisualStudioVersion = 14.0.25420.1 MinimumVisualStudioVersion = 10.0.40219.1 -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "vmafossexec", "wrapper\wrapper.vcxproj", "{C2D3FD1E-9068-494D-9655-88CE906B4C8B}" - ProjectSection(ProjectDependencies) = postProject - {3F07B371-1B81-477E-886C-0E079B0A6803} = {3F07B371-1B81-477E-886C-0E079B0A6803} - {03A4C79F-F1A8-48C9-A2AC-0A14EC0F093E} = {03A4C79F-F1A8-48C9-A2AC-0A14EC0F093E} - {5F5103F4-E473-4476-8E7B-FD3465E872B1} = {5F5103F4-E473-4476-8E7B-FD3465E872B1} - EndProjectSection +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "vmafossexec", "wrapper\vmafossexec.vcxproj", "{C2D3FD1E-9068-494D-9655-88CE906B4C8B}" EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ptools", "ptools\ptools.vcxproj", "{3F07B371-1B81-477E-886C-0E079B0A6803}" - ProjectSection(ProjectDependencies) = postProject - {03A4C79F-F1A8-48C9-A2AC-0A14EC0F093E} = {03A4C79F-F1A8-48C9-A2AC-0A14EC0F093E} - EndProjectSection EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pthreads", "pthreads\COPKG\pthreads.vcxproj", "{03A4C79F-F1A8-48C9-A2AC-0A14EC0F093E}" EndProject @@ -21,31 +13,15 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libvmaf", "wrapper\libvmaf. EndProject Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "tools", "tools", "{3202BE63-8C0E-4405-9B6D-DFD6F99B3CB4}" EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "moment", "feature\vs2015\moment\moment.vcxproj", "{D67BDBF2-D42F-465D-BABD-A381BFFAA373}" - ProjectSection(ProjectDependencies) = postProject - {5F5103F4-E473-4476-8E7B-FD3465E872B1} = {5F5103F4-E473-4476-8E7B-FD3465E872B1} - EndProjectSection +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "moment", "feature\msvc\moment.vcxproj", "{D67BDBF2-D42F-465D-BABD-A381BFFAA373}" EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ms_ssim", "feature\vs2015\ms_ssim\ms_ssim.vcxproj", "{CF8FA427-306B-4803-9F23-31C229A630B6}" - ProjectSection(ProjectDependencies) = postProject - {5F5103F4-E473-4476-8E7B-FD3465E872B1} = {5F5103F4-E473-4476-8E7B-FD3465E872B1} - EndProjectSection +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ms_ssim", "feature\msvc\ms_ssim.vcxproj", "{CF8FA427-306B-4803-9F23-31C229A630B6}" EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "psnr", "feature\vs2015\psnr\psnr.vcxproj", "{2DC3E418-09C6-49F4-A8DA-04C614D6016D}" - ProjectSection(ProjectDependencies) = postProject - {5F5103F4-E473-4476-8E7B-FD3465E872B1} = {5F5103F4-E473-4476-8E7B-FD3465E872B1} - EndProjectSection +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "psnr", "feature\msvc\psnr.vcxproj", "{2DC3E418-09C6-49F4-A8DA-04C614D6016D}" EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ssim", "feature\vs2015\ssim\ssim.vcxproj", "{418D8FFD-D23A-4C56-8A94-D4B9D39083D1}" - ProjectSection(ProjectDependencies) = postProject - {5F5103F4-E473-4476-8E7B-FD3465E872B1} = {5F5103F4-E473-4476-8E7B-FD3465E872B1} - EndProjectSection +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ssim", "feature\msvc\ssim.vcxproj", "{418D8FFD-D23A-4C56-8A94-D4B9D39083D1}" EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "vmaf", "feature\vs2015\vmaf\vmaf.vcxproj", "{054010E9-3004-4C24-B0F3-DCCE36D6B436}" - ProjectSection(ProjectDependencies) = postProject - {03A4C79F-F1A8-48C9-A2AC-0A14EC0F093E} = {03A4C79F-F1A8-48C9-A2AC-0A14EC0F093E} - {5F5103F4-E473-4476-8E7B-FD3465E872B1} = {5F5103F4-E473-4476-8E7B-FD3465E872B1} - EndProjectSection +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "vmaf", "feature\msvc\vmaf.vcxproj", "{054010E9-3004-4C24-B0F3-DCCE36D6B436}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution diff --git a/wrapper/libvmaf.vcxproj b/wrapper/libvmaf.vcxproj index 40500db15..1522b118b 100644 --- a/wrapper/libvmaf.vcxproj +++ b/wrapper/libvmaf.vcxproj @@ -11,6 +11,7 @@ + true {5F5103F4-E473-4476-8E7B-FD3465E872B1} Win32Proj libvmaf @@ -42,17 +43,15 @@ - + + - - + ..\feature\src;..\feature\src\common;..\pthreads;..\ptools;..\ptools\opencontainers_1_8_4\include;%(AdditionalIncludeDirectories) Level3 - Disabled - MULTI_THREADING;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;PTW32_STATIC_LIB;OC_NEW_STYLE_INCLUDES;WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions) - ..\feature\src\common;..\ptools;..\ptools\opencontainers_1_8_4\include;..\pthreads;..\feature\src;%(AdditionalIncludeDirectories) true true + MULTI_THREADING;PTW32_STATIC_LIB;OC_NEW_STYLE_INCLUDES;_DEBUG;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) MultiThreadedDebug @@ -61,17 +60,12 @@ + ..\feature\src;..\feature\src\common;..\pthreads;..\ptools;..\ptools\opencontainers_1_8_4\include;%(AdditionalIncludeDirectories) Level3 - - - MaxSpeed - true - true - MULTI_THREADING;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;PTW32_STATIC_LIB;OC_NEW_STYLE_INCLUDES;WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions) - ..\feature\src\common;..\ptools;..\ptools\opencontainers_1_8_4\include;..\pthreads;..\feature\src;%(AdditionalIncludeDirectories) - true true + MULTI_THREADING;PTW32_STATIC_LIB;OC_NEW_STYLE_INCLUDES;NDEBUG;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) MultiThreaded + false Windows @@ -79,9 +73,6 @@ true - - - @@ -92,7 +83,10 @@ - + + AdvancedVectorExtensions + AdvancedVectorExtensions + @@ -119,6 +113,7 @@ + @@ -138,6 +133,7 @@ + @@ -147,6 +143,7 @@ + diff --git a/wrapper/libvmaf.vcxproj.filters b/wrapper/libvmaf.vcxproj.filters index d128d8e5f..05f5670e9 100644 --- a/wrapper/libvmaf.vcxproj.filters +++ b/wrapper/libvmaf.vcxproj.filters @@ -26,9 +26,6 @@ {25a2d365-ffa6-4464-9712-344c46384a31} - - - feature @@ -45,9 +42,6 @@ feature - - feature - feature @@ -126,6 +120,9 @@ Source Files + + feature\common + @@ -140,9 +137,6 @@ feature - - feature - feature @@ -236,5 +230,17 @@ Header Files + + feature\common + + + feature + + + feature + + + Header Files + \ No newline at end of file diff --git a/wrapper/wrapper.vcxproj b/wrapper/vmafossexec.vcxproj similarity index 61% rename from wrapper/wrapper.vcxproj rename to wrapper/vmafossexec.vcxproj index 4f3828896..04a70888b 100644 --- a/wrapper/wrapper.vcxproj +++ b/wrapper/vmafossexec.vcxproj @@ -11,11 +11,11 @@ + true {C2D3FD1E-9068-494D-9655-88CE906B4C8B} Win32Proj - wrapper + vmafossexec 8.1 - vmafossexec @@ -34,7 +34,8 @@ - + + @@ -42,31 +43,18 @@ - - true - static - - - false - static - + + - - + ..\feature\src;..\feature\src\common;%(AdditionalIncludeDirectories) Level3 - Disabled - MULTI_THREADING;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;PTW32_STATIC_LIB;OC_NEW_STYLE_INCLUDES;WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) true - ..\feature\src\common;..\ptools;..\ptools\opencontainers_1_8_4\include;..\pthreads;..\feature\src;%(AdditionalIncludeDirectories) + MULTI_THREADING;_DEBUG;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) MultiThreadedDebug - true Console - true - libvmaf.lib;libpthread-static.lib;%(AdditionalDependencies) - $(SolutionDir)\$(Platform)\$(Configuration)\;%(AdditionalLibraryDirectories) copy examples.bat to output folder @@ -75,26 +63,16 @@ + ..\feature\src;..\feature\src\common;%(AdditionalIncludeDirectories) Level3 - - - MaxSpeed - true - true - MULTI_THREADING;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;PTW32_STATIC_LIB;OC_NEW_STYLE_INCLUDES;WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - true - ..\feature\src\common;..\ptools;..\ptools\opencontainers_1_8_4\include;..\pthreads;..\feature\src;%(AdditionalIncludeDirectories) + MULTI_THREADING;NDEBUG;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) MultiThreaded - AdvancedVectorExtensions2 - true + false Console true true - true - libvmaf.lib;libpthread-static.lib;%(AdditionalDependencies) - $(SolutionDir)\$(Platform)\$(Configuration)\;%(AdditionalLibraryDirectories) copy examples.bat to output folder @@ -105,9 +83,17 @@ + + {03a4c79f-f1a8-48c9-a2ac-0a14ec0f093e} + {3f07b371-1b81-477e-886c-0e079b0a6803} + + {5f5103f4-e473-4476-8e7b-fd3465e872b1} + + + \ No newline at end of file diff --git a/wrapper/vmafossexec.vcxproj.filters b/wrapper/vmafossexec.vcxproj.filters new file mode 100644 index 000000000..7459f10e3 --- /dev/null +++ b/wrapper/vmafossexec.vcxproj.filters @@ -0,0 +1,22 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + \ No newline at end of file diff --git a/wrapper/wrapper.vcxproj.filters b/wrapper/wrapper.vcxproj.filters deleted file mode 100644 index 2dc63c25f..000000000 --- a/wrapper/wrapper.vcxproj.filters +++ /dev/null @@ -1,6 +0,0 @@ - - - - - - \ No newline at end of file From 012f51cb0667b40f0d5c872b828cf27bf5086266 Mon Sep 17 00:00:00 2001 From: Zhi Li Date: Tue, 29 Jan 2019 16:50:40 -0800 Subject: [PATCH 10/29] Optimization of ADM, VIF and buffer management modules (#288) (#289) * Optimization of ADM, VIF and buffer management modules (#288) * Format files; misc. --- feature/src/adm.c | 228 +++ feature/src/adm_tools.c | 709 ++++++++ feature/src/adm_tools.h | 124 ++ feature/src/common/blur_array.c | 141 +- feature/src/common/blur_array.h | 26 +- feature/src/common/convolution.c | 1 + feature/src/common/convolution.h | 5 + feature/src/common/convolution_avx.c | 1792 +++++++++++++++++++++ feature/src/common/convolution_internal.h | 60 + feature/src/vif.c | 94 +- feature/src/vif_options.h | 6 + feature/src/vif_tools.c | 203 +++ feature/src/vif_tools.h | 6 + wrapper/Makefile | 10 +- wrapper/src/combo.c | 207 ++- wrapper/src/combo.h | 3 + wrapper/src/darray.c | 16 + wrapper/src/libvmaf.h | 9 + wrapper/src/main.cpp | 64 +- wrapper/src/vmaf.cpp | 7 + 20 files changed, 3676 insertions(+), 35 deletions(-) diff --git a/feature/src/adm.c b/feature/src/adm.c index 7e77ea3fa..1123d710c 100644 --- a/feature/src/adm.c +++ b/feature/src/adm.c @@ -39,6 +39,11 @@ typedef adm_dwt_band_t_s adm_dwt_band_t; #define adm_sum_cube adm_sum_cube_s #define offset_image offset_image_s +#if ADM_OPT_ENABLE + #define adm_csf_den_scale adm_csf_den_scale_s + #define dwt2_src_indices_filt dwt2_src_indices_filt_s +#endif + static char *init_dwt_band(adm_dwt_band_t *band, char *data_top, size_t buf_sz_one) { band->band_a = (float *)data_top; data_top += buf_sz_one; @@ -48,6 +53,228 @@ static char *init_dwt_band(adm_dwt_band_t *band, char *data_top, size_t buf_sz_o return data_top; } +#if ADM_OPT_ENABLE +static char *init_dwt_band_hvd(adm_dwt_band_t *band, char *data_top, size_t buf_sz_one) +{ + band->band_a = NULL; + band->band_h = (float *)data_top; data_top += buf_sz_one; + band->band_v = (float *)data_top; data_top += buf_sz_one; + band->band_d = (float *)data_top; data_top += buf_sz_one; + return data_top; +} + +int compute_adm(const float *ref, const float *dis, int w, int h, int ref_stride, int dis_stride, double *score, double *score_num, double *score_den, double *scores, double border_factor) +{ +#ifdef ADM_OPT_SINGLE_PRECISION + double numden_limit = 1e-2 * (w * h) / (1920.0 * 1080.0); +#else + double numden_limit = 1e-10 * (w * h) / (1920.0 * 1080.0); +#endif + float *data_buf = 0; + char *data_top; + + char *ind_buf_y = 0, *buf_y_orig = 0; + char *ind_buf_x = 0, *buf_x_orig = 0; + int *ind_y[4], *ind_x[4]; + + float *ref_scale; + float *dis_scale; + + adm_dwt_band_t ref_dwt2; + adm_dwt_band_t dis_dwt2; + + adm_dwt_band_t decouple_r; + adm_dwt_band_t decouple_a; + + adm_dwt_band_t csf_r; + adm_dwt_band_t csf_a; + const float *curr_ref_scale = ref; + const float *curr_dis_scale = dis; + int curr_ref_stride = ref_stride; + int curr_dis_stride = dis_stride; + + int orig_h = h; + + int buf_stride = ALIGN_CEIL(((w + 1) / 2) * sizeof(float)); + size_t buf_sz_one = (size_t)buf_stride * ((h + 1) / 2); + + int ind_size_y = ALIGN_CEIL(((h + 1) / 2) * sizeof(int)); + int ind_size_x = ALIGN_CEIL(((w + 1) / 2) * sizeof(int)); + + double num = 0; + double den = 0; + + int scale; + int ret = 1; + + // Code optimized to save on multiple buffer copies + // hence the reduction in the number of buffers required from 35 to 17 +#define NUM_BUFS_ADM 17 + if (SIZE_MAX / buf_sz_one < NUM_BUFS_ADM) + { + printf("error: SIZE_MAX / buf_sz_one < NUM_BUFS_ADM, buf_sz_one = %zu.\n", buf_sz_one); + fflush(stdout); + goto fail; + } + + if (!(data_buf = aligned_malloc(buf_sz_one * NUM_BUFS_ADM, MAX_ALIGN))) + { + printf("error: aligned_malloc failed for data_buf.\n"); + fflush(stdout); + goto fail; + } + + data_top = (char *)data_buf; + + data_top = init_dwt_band(&ref_dwt2, data_top, buf_sz_one); + data_top = init_dwt_band(&dis_dwt2, data_top, buf_sz_one); + data_top = init_dwt_band_hvd(&decouple_r, data_top, buf_sz_one); + data_top = init_dwt_band_hvd(&decouple_a, data_top, buf_sz_one); + data_top = init_dwt_band_hvd(&csf_a, data_top, buf_sz_one); + + if (!(buf_y_orig = aligned_malloc(ind_size_y * 4, MAX_ALIGN))) + { + printf("error: aligned_malloc failed for ind_buf_y.\n"); + fflush(stdout); + goto fail; + } + ind_buf_y = buf_y_orig; + ind_y[0] = (int*)ind_buf_y; ind_buf_y += ind_size_y; + ind_y[1] = (int*)ind_buf_y; ind_buf_y += ind_size_y; + ind_y[2] = (int*)ind_buf_y; ind_buf_y += ind_size_y; + ind_y[3] = (int*)ind_buf_y; ind_buf_y += ind_size_y; + + if (!(buf_x_orig = aligned_malloc(ind_size_x * 4, MAX_ALIGN))) + { + printf("error: aligned_malloc failed for ind_buf_x.\n"); + fflush(stdout); + goto fail; + } + ind_buf_x = buf_x_orig; + ind_x[0] = (int*)ind_buf_x; ind_buf_x += ind_size_x; + ind_x[1] = (int*)ind_buf_x; ind_buf_x += ind_size_x; + ind_x[2] = (int*)ind_buf_x; ind_buf_x += ind_size_x; + ind_x[3] = (int*)ind_buf_x; ind_buf_x += ind_size_x; + + for (scale = 0; scale < 4; ++scale) { +#ifdef ADM_OPT_DEBUG_DUMP + char pathbuf[256]; +#endif + float num_scale = 0.0; + float den_scale = 0.0; + + dwt2_src_indices_filt(ind_y, ind_x, w, h); + adm_dwt2(curr_ref_scale, &ref_dwt2, ind_y, ind_x, w, h, curr_ref_stride, buf_stride); + adm_dwt2(curr_dis_scale, &dis_dwt2, ind_y, ind_x, w, h, curr_dis_stride, buf_stride); + + w = (w + 1) / 2; + h = (h + 1) / 2; + + adm_decouple(&ref_dwt2, &dis_dwt2, &decouple_r, &decouple_a, w, h, buf_stride, buf_stride, buf_stride, buf_stride, border_factor); + + den_scale = adm_csf_den_scale(&ref_dwt2, orig_h, scale, w, h, buf_stride, border_factor); + + adm_csf(&decouple_a, &csf_a, orig_h, scale, w, h, buf_stride, buf_stride, border_factor); + + num_scale = adm_cm(&decouple_r, NULL, &csf_a, w, h, buf_stride, buf_stride, buf_stride, border_factor, scale); + +#ifdef ADM_OPT_DEBUG_DUMP + sprintf(pathbuf, "stage/ref[%d]_a.yuv", scale); + write_image(pathbuf, ref_dwt2.band_a, w, h, buf_stride, sizeof(float)); + + sprintf(pathbuf, "stage/ref[%d]_h.yuv", scale); + write_image(pathbuf, ref_dwt2.band_h, w, h, buf_stride, sizeof(float)); + + sprintf(pathbuf, "stage/ref[%d]_v.yuv", scale); + write_image(pathbuf, ref_dwt2.band_v, w, h, buf_stride, sizeof(float)); + + sprintf(pathbuf, "stage/ref[%d]_d.yuv", scale); + write_image(pathbuf, ref_dwt2.band_d, w, h, buf_stride, sizeof(float)); + + sprintf(pathbuf, "stage/dis[%d]_a.yuv", scale); + write_image(pathbuf, dis_dwt2.band_a, w, h, buf_stride, sizeof(float)); + + sprintf(pathbuf, "stage/dis[%d]_h.yuv", scale); + write_image(pathbuf, dis_dwt2.band_h, w, h, buf_stride, sizeof(float)); + + sprintf(pathbuf, "stage/dis[%d]_v.yuv", scale); + write_image(pathbuf, dis_dwt2.band_v, w, h, buf_stride, sizeof(float)); + + sprintf(pathbuf, "stage/dis[%d]_d.yuv", scale); + write_image(pathbuf, dis_dwt2.band_d, w, h, buf_stride, sizeof(float)); + + sprintf(pathbuf, "stage/r[%d]_h.yuv", scale); + write_image(pathbuf, decouple_r.band_h, w, h, buf_stride, sizeof(float)); + + sprintf(pathbuf, "stage/r[%d]_v.yuv", scale); + write_image(pathbuf, decouple_r.band_v, w, h, buf_stride, sizeof(float)); + + sprintf(pathbuf, "stage/r[%d]_d.yuv", scale); + write_image(pathbuf, decouple_r.band_d, w, h, buf_stride, sizeof(float)); + + sprintf(pathbuf, "stage/a[%d]_h.yuv", scale); + write_image(pathbuf, decouple_a.band_h, w, h, buf_stride, sizeof(float)); + + sprintf(pathbuf, "stage/a[%d]_v.yuv", scale); + write_image(pathbuf, decouple_a.band_v, w, h, buf_stride, sizeof(float)); + + sprintf(pathbuf, "stage/a[%d]_d.yuv", scale); + write_image(pathbuf, decouple_a.band_d, w, h, buf_stride, sizeof(float)); + + sprintf(pathbuf, "stage/csf_a[%d]_h.yuv", scale); + write_image(pathbuf, csf_a.band_h, w, h, buf_stride, sizeof(float)); + + sprintf(pathbuf, "stage/csf_a[%d]_v.yuv", scale); + write_image(pathbuf, csf_a.band_v, w, h, buf_stride, sizeof(float)); + + sprintf(pathbuf, "stage/csf_a[%d]_d.yuv", scale); + write_image(pathbuf, csf_a.band_d, w, h, buf_stride, sizeof(float)); + +#endif + + num += num_scale; + den += den_scale; + + ref_scale = ref_dwt2.band_a; + dis_scale = dis_dwt2.band_a; + + curr_ref_scale = ref_scale; + curr_dis_scale = dis_scale; + + curr_ref_stride = buf_stride; + curr_dis_stride = buf_stride; + +#ifdef ADM_OPT_DEBUG_DUMP + PRINTF("num: %f\n", num); + PRINTF("den: %f\n", den); +#endif + scores[2 * scale + 0] = num_scale; + scores[2 * scale + 1] = den_scale; + } + + num = num < numden_limit ? 0 : num; + den = den < numden_limit ? 0 : den; + + if (den == 0.0) + { + *score = 1.0f; + } + else + { + *score = num / den; + } + *score_num = num; + *score_den = den; + + ret = 0; + +fail: + aligned_free(data_buf); + aligned_free(buf_y_orig); + aligned_free(buf_x_orig); + return ret; +} +#else // ADM_OPT_ENABLE int compute_adm(const float *ref, const float *dis, int w, int h, int ref_stride, int dis_stride, double *score, double *score_num, double *score_den, double *scores, double border_factor) { #ifdef ADM_OPT_SINGLE_PRECISION @@ -273,6 +500,7 @@ int compute_adm(const float *ref, const float *dis, int w, int h, int ref_stride aligned_free(data_buf); return ret; } +#endif int adm(int (*read_frame)(float *ref_data, float *main_data, float *temp_data, int stride, void *user_data), void *user_data, int w, int h, const char *fmt) { diff --git a/feature/src/adm_tools.c b/feature/src/adm_tools.c index 94c9116c2..5cb57aecd 100644 --- a/feature/src/adm_tools.c +++ b/feature/src/adm_tools.c @@ -48,6 +48,23 @@ static float rcp_s(float x) static const float dwt2_db2_coeffs_lo_s[4] = { 0.482962913144690, 0.836516303737469, 0.224143868041857, -0.129409522550921 }; static const float dwt2_db2_coeffs_hi_s[4] = { -0.129409522550921, -0.224143868041857, 0.836516303737469, -0.482962913144690 }; +#if ADM_OPT_ENABLE +#ifndef FLOAT_ONE_BY_30 +#define FLOAT_ONE_BY_30 0.0333333351 +#endif + +#ifndef FLOAT_ONE_BY_15 +#define FLOAT_ONE_BY_15 0.0666666701 +#endif + +static const float fcoeff_cm_thresh_s[3][3] = +{ + { FLOAT_ONE_BY_30, FLOAT_ONE_BY_30, FLOAT_ONE_BY_30 }, + { FLOAT_ONE_BY_30, FLOAT_ONE_BY_15, FLOAT_ONE_BY_30 }, + { FLOAT_ONE_BY_30, FLOAT_ONE_BY_30, FLOAT_ONE_BY_30 } +}; +#endif + float adm_sum_cube_s(const float *x, int w, int h, int stride, double border_factor) { int px_stride = stride / sizeof(float); @@ -76,6 +93,124 @@ float adm_sum_cube_s(const float *x, int w, int h, int stride, double border_fac return powf(accum, 1.0f / 3.0f) + powf((bottom - top) * (right - left) / 32.0f, 1.0f / 3.0f); } +#if ADM_OPT_ENABLE +void adm_decouple_s(const adm_dwt_band_t_s *ref, const adm_dwt_band_t_s *dis, const adm_dwt_band_t_s *r, const adm_dwt_band_t_s *a, int w, int h, int ref_stride, int dis_stride, int r_stride, int a_stride, double border_factor) +{ +#ifdef ADM_OPT_AVOID_ATAN + const float cos_1deg_sq = cos(1.0 * M_PI / 180.0) * cos(1.0 * M_PI / 180.0); +#endif + const float eps = 1e-30; + + int ref_px_stride = ref_stride / sizeof(float); + int dis_px_stride = dis_stride / sizeof(float); + int r_px_stride = r_stride / sizeof(float); + int a_px_stride = a_stride / sizeof(float); + + /* The computation of the score is not required for the regions which lie outside the frame borders */ + int left = w * border_factor - 0.5 - 1; // -1 for filter tap + int top = h * border_factor - 0.5 - 1; + int right = w - left + 2; // +2 for filter tap + int bottom = h - top + 2; + + if (left < 0) { + left = 0; + } + if (right > w) { + right = w; + } + if (top < 0) { + top = 0; + } + if (bottom > h) { + bottom = h; + } + + float oh, ov, od, th, tv, td; + float kh, kv, kd, tmph, tmpv, tmpd; +#ifdef ADM_OPT_AVOID_ATAN + float ot_dp, o_mag_sq, t_mag_sq; +#else + float oa, ta, diff; +#endif + int angle_flag; + int i, j; + + for (i = top; i < bottom; ++i) { + for (j = left; j < right; ++j) { + oh = ref->band_h[i * ref_px_stride + j]; + ov = ref->band_v[i * ref_px_stride + j]; + od = ref->band_d[i * ref_px_stride + j]; + th = dis->band_h[i * dis_px_stride + j]; + tv = dis->band_v[i * dis_px_stride + j]; + td = dis->band_d[i * dis_px_stride + j]; + + kh = DIVS(th, oh + eps); + kv = DIVS(tv, ov + eps); + kd = DIVS(td, od + eps); + + kh = kh < 0.0f ? 0.0f : (kh > 1.0f ? 1.0f : kh); + kv = kv < 0.0f ? 0.0f : (kv > 1.0f ? 1.0f : kv); + kd = kd < 0.0f ? 0.0f : (kd > 1.0f ? 1.0f : kd); + + tmph = kh * oh; + tmpv = kv * ov; + tmpd = kd * od; +#ifdef ADM_OPT_AVOID_ATAN + /* Determine if angle between (oh,ov) and (th,tv) is less than 1 degree. + * Given that u is the angle (oh,ov) and v is the angle (th,tv), this can + * be done by testing the inequvality. + * + * { (u.v.) >= 0 } AND { (u.v)^2 >= cos(1deg)^2 * ||u||^2 * ||v||^2 } + * + * Proof: + * + * cos(theta) = (u.v) / (||u|| * ||v||) + * + * IF u.v >= 0 THEN + * cos(theta)^2 = (u.v)^2 / (||u||^2 * ||v||^2) + * (u.v)^2 = cos(theta)^2 * ||u||^2 * ||v||^2 + * + * IF |theta| < 1deg THEN + * (u.v)^2 >= cos(1deg)^2 * ||u||^2 * ||v||^2 + * END + * ELSE + * |theta| > 90deg + * END + */ + ot_dp = oh * th + ov * tv; + o_mag_sq = oh * oh + ov * ov; + t_mag_sq = th * th + tv * tv; + + angle_flag = (ot_dp >= 0.0f) && (ot_dp * ot_dp >= cos_1deg_sq * o_mag_sq * t_mag_sq); +#else + oa = atanf(DIVS(ov, oh + eps)); + ta = atanf(DIVS(tv, th + eps)); + + if (oh < 0.0f) + oa += (float)M_PI; + if (th < 0.0f) + ta += (float)M_PI; + + diff = fabsf(oa - ta) * 180.0f / M_PI; + angle_flag = diff < 1.0f; +#endif + if (angle_flag) { + tmph = th; + tmpv = tv; + tmpd = td; + } + + r->band_h[i * r_px_stride + j] = tmph; + r->band_v[i * r_px_stride + j] = tmpv; + r->band_d[i * r_px_stride + j] = tmpd; + + a->band_h[i * a_px_stride + j] = th - tmph; + a->band_v[i * a_px_stride + j] = tv - tmpv; + a->band_d[i * a_px_stride + j] = td - tmpd; + } + } +} +#else void adm_decouple_s(const adm_dwt_band_t_s *ref, const adm_dwt_band_t_s *dis, const adm_dwt_band_t_s *r, const adm_dwt_band_t_s *a, int w, int h, int ref_stride, int dis_stride, int r_stride, int a_stride) { #ifdef ADM_OPT_AVOID_ATAN @@ -173,7 +308,59 @@ void adm_decouple_s(const adm_dwt_band_t_s *ref, const adm_dwt_band_t_s *dis, co } } } +#endif +#if ADM_OPT_ENABLE +void adm_csf_s(const adm_dwt_band_t_s *src, const adm_dwt_band_t_s *dst, int orig_h, int scale, int w, int h, int src_stride, int dst_stride, double border_factor) +{ + const float *src_angles[3] = { src->band_h, src->band_v, src->band_d }; + float *dst_angles[3] = { dst->band_h, dst->band_v, dst->band_d }; + + const float *src_ptr; + float *dst_ptr; + + int src_px_stride = src_stride / sizeof(float); + int dst_px_stride = dst_stride / sizeof(float); + + // for ADM: scales goes from 0 to 3 but in noise floor paper, it goes from + // 1 to 4 (from finest scale to coarsest scale). + float factor1 = dwt_quant_step(&dwt_7_9_YCbCr_threshold[0], scale, 1); + float factor2 = dwt_quant_step(&dwt_7_9_YCbCr_threshold[0], scale, 2); + float rfactor[3] = { 1.0f / factor1, 1.0f / factor1, 1.0f / factor2 }; + + /* The computation of the csf values is not required for the regions which lie outside the frame borders */ + int left = w * border_factor - 0.5 - 1; // -1 for filter tap + int top = h * border_factor - 0.5 - 1; + int right = w - left + 2; // +2 for filter tap + int bottom = h - top + 2; + + if (left < 0) { + left = 0; + } + if (right > w) { + right = w; + } + if (top < 0) { + top = 0; + } + if (bottom > h) { + bottom = h; + } + + int i, j, theta; + + for (theta = 0; theta < 3; ++theta) { + src_ptr = src_angles[theta]; + dst_ptr = dst_angles[theta]; + + for (i = top; i < bottom; ++i) { + for (j = left; j < right; ++j) { + dst_ptr[i * dst_px_stride + j] = rfactor[theta] * src_ptr[i * src_px_stride + j]; + } + } + } +} +#else void adm_csf_s(const adm_dwt_band_t_s *src, const adm_dwt_band_t_s *dst, int orig_h, int scale, int w, int h, int src_stride, int dst_stride) { const float *src_angles[3] = { src->band_h, src->band_v, src->band_d }; @@ -204,7 +391,137 @@ void adm_csf_s(const adm_dwt_band_t_s *src, const adm_dwt_band_t_s *dst, int ori } } } +#endif +#if ADM_OPT_ENABLE +/* Combination of adm_csf_s and adm_sum_cube_s for csf_o based den_scale */ +float adm_csf_den_scale_s(const adm_dwt_band_t_s *src, int orig_h, int scale, int w, int h, int src_stride, double border_factor) +{ + float *src_h = src->band_h, *src_v = src->band_v, *src_d = src->band_d; + + int src_px_stride = src_stride / sizeof(float); + + // for ADM: scales goes from 0 to 3 but in noise floor paper, it goes from + // 1 to 4 (from finest scale to coarsest scale). + float factor1 = dwt_quant_step(&dwt_7_9_YCbCr_threshold[0], scale, 1); + float factor2 = dwt_quant_step(&dwt_7_9_YCbCr_threshold[0], scale, 2); + float rfactor[3] = { 1.0f / factor1, 1.0f / factor1, 1.0f / factor2 }; + + float accum_h = 0, accum_v = 0, accum_d = 0; + float accum_inner_h, accum_inner_v, accum_inner_d; + float den_scale_h, den_scale_v, den_scale_d; + + float val; + + /* The computation of the denominator scales is not required for the regions which lie outside the frame borders */ + int left = w * border_factor - 0.5; + int top = h * border_factor - 0.5; + int right = w - left; + int bottom = h - top; + + int i, j; + + for (i = top; i < bottom; ++i) { + accum_inner_h = 0; + accum_inner_v = 0; + accum_inner_d = 0; + src_h = src->band_h + i * src_px_stride; + src_v = src->band_v + i * src_px_stride; + src_d = src->band_d + i * src_px_stride; + for (j = left; j < right; ++j) { + float abs_csf_o_val_h = fabsf(rfactor[0] * src_h[j]); + float abs_csf_o_val_v = fabsf(rfactor[1] * src_v[j]); + float abs_csf_o_val_d = fabsf(rfactor[2] * src_d[j]); + + val = abs_csf_o_val_h * abs_csf_o_val_h * abs_csf_o_val_h; + accum_inner_h += val; + val = abs_csf_o_val_v * abs_csf_o_val_v * abs_csf_o_val_v; + accum_inner_v += val; + val = abs_csf_o_val_d * abs_csf_o_val_d * abs_csf_o_val_d; + accum_inner_d += val; + } + + accum_h += accum_inner_h; + accum_v += accum_inner_v; + accum_d += accum_inner_d; + + } + + den_scale_h = powf(accum_h, 1.0f / 3.0f) + powf((bottom - top) * (right - left) / 32.0f, 1.0f / 3.0f); + den_scale_v = powf(accum_v, 1.0f / 3.0f) + powf((bottom - top) * (right - left) / 32.0f, 1.0f / 3.0f); + den_scale_d = powf(accum_d, 1.0f / 3.0f) + powf((bottom - top) * (right - left) / 32.0f, 1.0f / 3.0f); + + return(den_scale_h + den_scale_v + den_scale_d); + +} +#endif + +#if ADM_OPT_ENABLE +void adm_cm_thresh_s(const adm_dwt_band_t_s *src, float *dst, int w, int h, int src_stride, int dst_stride) +{ + const float *angles[3] = { src->band_h, src->band_v, src->band_d }; + const float *src_ptr; + + int src_px_stride = src_stride / sizeof(float); + int dst_px_stride = dst_stride / sizeof(float); + + float fcoeff, imgcoeff; + + int theta, i, j, fi, fj, ii, jj; + + /* i = 0, j = 0: indices y: 1,0,1, x: 1,0,1 */ + { + float accum; + ADM_CM_THRESH_S_0_0(angles, src_px_stride, &accum, w, h, 0, 0) + dst[0] = accum; + } + + /* i = 0, j = w-1: indices y: 1,0,1, x: w-2, w-1, w-1 */ + { + float accum; + ADM_CM_THRESH_S_0_W_M_1(angles, src_px_stride, &accum, w, h, 0, (w - 1)) + dst[w - 1] = accum; + } + + /* i = 0, j = 1, ..., w-2: indices y: 1,0,1, x: j-1,j,j+1 */ + for (j = 1; j < (w - 1); ++j) { + float accum; + ADM_CM_THRESH_S_0_J(angles, src_px_stride, &accum, w, h, 0, j) + dst[j] = accum; + } + + /* i = h-1, j = 0: indices y: h-2,h-1,h-1, x: 1,0,1 */ + { + float accum; + ADM_CM_THRESH_S_H_M_1_0(angles, src_px_stride, &accum, w, h, (h - 1), 0) + dst[dst_px_stride*(h - 1)] = accum; + } + + /* i = h-1, j = w-1: indices y: h-2,h-1,h-1, x: w-2, w-1, w-1 */ + { + float accum; + ADM_CM_THRESH_S_H_M_1_W_M_1(angles, src_px_stride, &accum, w, h, (h - 1), (w - 1)) + dst[dst_px_stride*(h - 1) + w - 1] = accum; + } + + /* i = h-1, j = 1, ..., w-2: indices y: h-2,h-1,h-1, x: j-1,j,j+1 */ + for (j = 1; j < (w - 1); ++j) { + float accum; + ADM_CM_THRESH_S_H_M_1_J(angles, src_px_stride, &accum, w, h, (h - 1), j) + dst[dst_px_stride*(h - 1) + j] = accum; + } + + /* i = 1,..,h-2, j = 1,..,w-2: indices y: i-1,i,i+1, x: j-1,j,j+1 */ + for (i = 1; i < (h - 1); ++i) { + for (j = 1; j < (w - 1); ++j) { + float accum; + ADM_CM_THRESH_S_I_J(angles, src_px_stride, &accum, w, h, i, j) + dst[dst_px_stride*i + j] = accum; + } + } + +} +#else void adm_cm_thresh_s(const adm_dwt_band_t_s *src, float *dst, int w, int h, int src_stride, int dst_stride) { const float *angles[3] = { src->band_h, src->band_v, src->band_d }; @@ -257,7 +574,251 @@ void adm_cm_thresh_s(const adm_dwt_band_t_s *src, float *dst, int w, int h, int } } } +#endif +#if ADM_OPT_ENABLE +float adm_cm_s(const adm_dwt_band_t_s *src, const adm_dwt_band_t_s *dst, const adm_dwt_band_t_s *csf_a, int w, int h, int src_stride, int dst_stride, int csf_a_stride, double border_factor, int scale) +{ + /* Take decouple_r as src and do dsf_s on decouple_r here to get csf_r */ + float *src_h = src->band_h, *src_v = src->band_v, *src_d = src->band_d; + + // for ADM: scales goes from 0 to 3 but in noise floor paper, it goes from + // 1 to 4 (from finest scale to coarsest scale). + float factor1 = dwt_quant_step(&dwt_7_9_YCbCr_threshold[0], scale, 1); + float factor2 = dwt_quant_step(&dwt_7_9_YCbCr_threshold[0], scale, 2); + float rfactor[3] = { 1.0f / factor1, 1.0f / factor1, 1.0f / factor2 }; + + const float *angles[3] = { csf_a->band_h, csf_a->band_v, csf_a->band_d }; + + int src_px_stride = src_stride / sizeof(float); + int dst_px_stride = dst_stride / sizeof(float); + int csf_px_stride = csf_a_stride / sizeof(float); + + float xh, xv, xd, thr; + + float val; + float accum_h = 0, accum_v = 0, accum_d = 0; + float accum_inner_h, accum_inner_v, accum_inner_d; + float num_scale_h, num_scale_v, num_scale_d; + + /* The computation of the scales is not required for the regions which lie outside the frame borders */ + int left = w * border_factor - 0.5; + int top = h * border_factor - 0.5; + int right = w - left; + int bottom = h - top; + + int start_col = (left > 1) ? left : 1; + int end_col = (right < (w - 1)) ? right : (w - 1); + int start_row = (top > 1) ? top : 1; + int end_row = (bottom < (h - 1)) ? bottom : (h - 1); + + int i, j; + + /* i=0,j=0 */ + accum_inner_h = 0; + accum_inner_v = 0; + accum_inner_d = 0; + if ((top <= 0) && (left <= 0)) + { + xh = src->band_h[0] * rfactor[0]; + xv = src->band_v[0] * rfactor[1]; + xd = src->band_d[0] * rfactor[2]; + ADM_CM_THRESH_S_0_0(angles, csf_px_stride, &thr, w, h, 0, 0); + + xh = fabsf(xh) - thr; + xv = fabsf(xv) - thr; + xd = fabsf(xd) - thr; + + xh = xh < 0.0f ? 0.0f : xh; + xv = xv < 0.0f ? 0.0f : xv; + xd = xd < 0.0f ? 0.0f : xd; + + val = (xh * xh * xh); + accum_inner_h += val; + val = (xv * xv * xv); + accum_inner_v += val; + val = (xd * xd * xd); + accum_inner_d += val; + + } + + /* i=0,j=w-1 */ + if ((top <= 0) && (right > (w - 1))) + { + xh = src->band_h[w - 1] * rfactor[0]; + xv = src->band_v[w - 1] * rfactor[1]; + xd = src->band_d[w - 1] * rfactor[2]; + ADM_CM_THRESH_S_0_W_M_1(angles, csf_px_stride, &thr, w, h, 0, (w - 1)); + + xh = fabsf(xh) - thr; + xv = fabsf(xv) - thr; + xd = fabsf(xd) - thr; + + xh = xh < 0.0f ? 0.0f : xh; + xv = xv < 0.0f ? 0.0f : xv; + xd = xd < 0.0f ? 0.0f : xd; + + val = (xh * xh * xh); + accum_inner_h += val; + val = (xv * xv * xv); + accum_inner_v += val; + val = (xd * xd * xd); + accum_inner_d += val; + + } + + /* i=0, j */ + if (top <= 0) { + for (j = start_col; j < end_col; ++j) { + xh = src->band_h[j] * rfactor[0]; + xv = src->band_v[j] * rfactor[1]; + xd = src->band_d[j] * rfactor[2]; + ADM_CM_THRESH_S_0_J(angles, csf_px_stride, &thr, w, h, 0, j); + + xh = fabsf(xh) - thr; + xv = fabsf(xv) - thr; + xd = fabsf(xd) - thr; + + xh = xh < 0.0f ? 0.0f : xh; + xv = xv < 0.0f ? 0.0f : xv; + xd = xd < 0.0f ? 0.0f : xd; + + val = (xh * xh * xh); + accum_inner_h += val; + val = (xv * xv * xv); + accum_inner_v += val; + val = (xd * xd * xd); + accum_inner_d += val; + + } + } + + accum_h += accum_inner_h; + accum_v += accum_inner_v; + accum_d += accum_inner_d; + + accum_inner_h = 0; + accum_inner_v = 0; + accum_inner_d = 0; + + /* i=h-1,j=0 */ + if ((bottom > (h - 1)) && (left <= 0)) + { + xh = src->band_h[(h - 1) * src_px_stride] * rfactor[0]; + xv = src->band_v[(h - 1) * src_px_stride] * rfactor[1]; + xd = src->band_d[(h - 1) * src_px_stride] * rfactor[2]; + ADM_CM_THRESH_S_H_M_1_0(angles, csf_px_stride, &thr, w, h, (h - 1), 0); + + xh = fabsf(xh) - thr; + xv = fabsf(xv) - thr; + xd = fabsf(xd) - thr; + + xh = xh < 0.0f ? 0.0f : xh; + xv = xv < 0.0f ? 0.0f : xv; + xd = xd < 0.0f ? 0.0f : xd; + + val = (xh * xh * xh); + accum_inner_h += val; + val = (xv * xv * xv); + accum_inner_v += val; + val = (xd * xd * xd); + accum_inner_d += val; + + } + + /* i-h-1,j=w-1 */ + if ((bottom > (h - 1)) && (right > (w - 1))) + { + xh = src->band_h[(h - 1) * src_px_stride + w - 1] * rfactor[0]; + xv = src->band_v[(h - 1) * src_px_stride + w - 1] * rfactor[1]; + xd = src->band_d[(h - 1) * src_px_stride + w - 1] * rfactor[2]; + ADM_CM_THRESH_S_H_M_1_W_M_1(angles, csf_px_stride, &thr, w, h, (h - 1), (w - 1)); + + xh = fabsf(xh) - thr; + xv = fabsf(xv) - thr; + xd = fabsf(xd) - thr; + + xh = xh < 0.0f ? 0.0f : xh; + xv = xv < 0.0f ? 0.0f : xv; + xd = xd < 0.0f ? 0.0f : xd; + + val = (xh * xh * xh); + accum_inner_h += val; + val = (xv * xv * xv); + accum_inner_v += val; + val = (xd * xd * xd); + accum_inner_d += val; + + } + + /* i=h-1,j */ + if (bottom > (h - 1)) { + for (j = start_col; j < end_col; ++j) { + xh = src->band_h[(h - 1) * src_px_stride + j] * rfactor[0]; + xv = src->band_v[(h - 1) * src_px_stride + j] * rfactor[1]; + xd = src->band_d[(h - 1) * src_px_stride + j] * rfactor[2]; + ADM_CM_THRESH_S_H_M_1_J(angles, csf_px_stride, &thr, w, h, (h - 1), j); + + xh = fabsf(xh) - thr; + xv = fabsf(xv) - thr; + xd = fabsf(xd) - thr; + + xh = xh < 0.0f ? 0.0f : xh; + xv = xv < 0.0f ? 0.0f : xv; + xd = xd < 0.0f ? 0.0f : xd; + + val = (xh * xh * xh); + accum_inner_h += val; + val = (xv * xv * xv); + accum_inner_v += val; + val = (xd * xd * xd); + accum_inner_d += val; + + } + } + + accum_h += accum_inner_h; + accum_v += accum_inner_v; + accum_d += accum_inner_d; + + for (i = start_row; i < end_row; ++i) { + accum_inner_h = 0; + accum_inner_v = 0; + accum_inner_d = 0; + for (j = start_col; j < end_col; ++j) { + xh = src->band_h[i * src_px_stride + j] * rfactor[0]; + xv = src->band_v[i * src_px_stride + j] * rfactor[1]; + xd = src->band_d[i * src_px_stride + j] * rfactor[2]; + ADM_CM_THRESH_S_I_J(angles, csf_px_stride, &thr, w, h, i, j); + + xh = fabsf(xh) - thr; + xv = fabsf(xv) - thr; + xd = fabsf(xd) - thr; + + xh = xh < 0.0f ? 0.0f : xh; + xv = xv < 0.0f ? 0.0f : xv; + xd = xd < 0.0f ? 0.0f : xd; + + val = (xh * xh * xh); + accum_inner_h += val; + val = (xv * xv * xv); + accum_inner_v += val; + val = (xd * xd * xd); + accum_inner_d += val; + + } + accum_h += accum_inner_h; + accum_v += accum_inner_v; + accum_d += accum_inner_d; + } + + num_scale_h = powf(accum_h, 1.0f / 3.0f) + powf((bottom - top) * (right - left) / 32.0f, 1.0f / 3.0f); + num_scale_v = powf(accum_v, 1.0f / 3.0f) + powf((bottom - top) * (right - left) / 32.0f, 1.0f / 3.0f); + num_scale_d = powf(accum_d, 1.0f / 3.0f) + powf((bottom - top) * (right - left) / 32.0f, 1.0f / 3.0f); + + return (num_scale_h + num_scale_v + num_scale_d); +} +#else void adm_cm_s(const adm_dwt_band_t_s *src, const adm_dwt_band_t_s *dst, const float *thresh, int w, int h, int src_stride, int dst_stride, int thresh_stride) { int src_px_stride = src_stride / sizeof(float); @@ -289,7 +850,154 @@ void adm_cm_s(const adm_dwt_band_t_s *src, const adm_dwt_band_t_s *dst, const fl } } } +#endif + +#if ADM_OPT_ENABLE +// This function stores the imgcoeff values used in adm_dwt2_s in buffers, which reduces the control code cycles. +void dwt2_src_indices_filt_s(int **src_ind_y, int **src_ind_x, int w, int h) +{ + int i, j; + int ind0, ind1, ind2, ind3; + /* Vertical pass */ + for (i = 0; i < (h + 1) / 2; ++i) { /* Index = 2 * i - 1 + fi */ + ind0 = 2 * i - 1; + ind0 = (ind0 < 0) ? -ind0 : ((ind0 >= h) ? (2 * h - ind0 - 1) : ind0); + ind1 = 2 * i; + if (ind1 >= h) { + ind1 = (2 * h - ind1 - 1); + } + ind2 = 2 * i + 1; + if (ind2 >= h) { + ind2 = (2 * h - ind2 - 1); + } + ind3 = 2 * i + 2; + if (ind3 >= h) { + ind3 = (2 * h - ind3 - 1); + } + src_ind_y[0][i] = ind0; + src_ind_y[1][i] = ind1; + src_ind_y[2][i] = ind2; + src_ind_y[3][i] = ind3; + } + /* Horizontal pass */ + for (j = 0; j < (w + 1) / 2; ++j) { /* Index = 2 * j - 1 + fj */ + ind0 = 2 * j - 1; + ind0 = (ind0 < 0) ? -ind0 : ((ind0 >= w) ? (2 * w - ind0 - 1) : ind0); + ind1 = 2 * j; + if (ind1 >= w) { + ind1 = (2 * w - ind1 - 1); + } + ind2 = 2 * j + 1; + if (ind2 >= w) { + ind2 = (2 * w - ind2 - 1); + } + ind3 = 2 * j + 2; + if (ind3 >= w) { + ind3 = (2 * w - ind3 - 1); + } + src_ind_x[0][j] = ind0; + src_ind_x[1][j] = ind1; + src_ind_x[2][j] = ind2; + src_ind_x[3][j] = ind3; + } +} +#endif + +#if ADM_OPT_ENABLE +void adm_dwt2_s(const float *src, const adm_dwt_band_t_s *dst, int **ind_y, int **ind_x, int w, int h, int src_stride, int dst_stride) +{ + const float *filter_lo = dwt2_db2_coeffs_lo_s; + const float *filter_hi = dwt2_db2_coeffs_hi_s; + int fwidth = sizeof(dwt2_db2_coeffs_lo_s) / sizeof(float); + + int src_px_stride = src_stride / sizeof(float); + int dst_px_stride = dst_stride / sizeof(float); + + float *tmplo = aligned_malloc(ALIGN_CEIL(sizeof(float) * w), MAX_ALIGN); + float *tmphi = aligned_malloc(ALIGN_CEIL(sizeof(float) * w), MAX_ALIGN); + float fcoeff_lo, fcoeff_hi, imgcoeff; + float s0, s1, s2, s3; + float accum; + + int i, j, fi, fj, ii, jj; + int j0, j1, j2, j3; + + for (i = 0; i < (h + 1) / 2; ++i) { + /* Vertical pass. */ + for (j = 0; j < w; ++j) { + s0 = src[ind_y[0][i] * src_px_stride + j]; + s1 = src[ind_y[1][i] * src_px_stride + j]; + s2 = src[ind_y[2][i] * src_px_stride + j]; + s3 = src[ind_y[3][i] * src_px_stride + j]; + + + accum = 0; + accum += filter_lo[0] * s0; + accum += filter_lo[1] * s1; + accum += filter_lo[2] * s2; + accum += filter_lo[3] * s3; + tmplo[j] = accum; + + accum = 0; + accum += filter_hi[0] * s0; + accum += filter_hi[1] * s1; + accum += filter_hi[2] * s2; + accum += filter_hi[3] * s3; + tmphi[j] = accum; + } + + /* Horizontal pass (lo and hi). */ + for (j = 0; j < (w + 1) / 2; ++j) { + + j0 = ind_x[0][j]; + j1 = ind_x[1][j]; + j2 = ind_x[2][j]; + j3 = ind_x[3][j]; + s0 = tmplo[j0]; + s1 = tmplo[j1]; + s2 = tmplo[j2]; + s3 = tmplo[j3]; + + accum = 0; + accum += filter_lo[0] * s0; + accum += filter_lo[1] * s1; + accum += filter_lo[2] * s2; + accum += filter_lo[3] * s3; + dst->band_a[i * dst_px_stride + j] = accum; + + accum = 0; + accum += filter_hi[0] * s0; + accum += filter_hi[1] * s1; + accum += filter_hi[2] * s2; + accum += filter_hi[3] * s3; + dst->band_v[i * dst_px_stride + j] = accum; + s0 = tmphi[j0]; + s1 = tmphi[j1]; + s2 = tmphi[j2]; + s3 = tmphi[j3]; + + accum = 0; + accum += filter_lo[0] * s0; + accum += filter_lo[1] * s1; + accum += filter_lo[2] * s2; + accum += filter_lo[3] * s3; + dst->band_h[i * dst_px_stride + j] = accum; + + accum = 0; + accum += filter_hi[0] * s0; + accum += filter_hi[1] * s1; + accum += filter_hi[2] * s2; + accum += filter_hi[3] * s3; + dst->band_d[i * dst_px_stride + j] = accum; + + } + } + + aligned_free(tmplo); + aligned_free(tmphi); +} +#else void adm_dwt2_s(const float *src, const adm_dwt_band_t_s *dst, int w, int h, int src_stride, int dst_stride) { const float *filter_lo = dwt2_db2_coeffs_lo_s; @@ -391,6 +1099,7 @@ void adm_dwt2_s(const float *src, const adm_dwt_band_t_s *dst, int w, int h, int aligned_free(tmplo); aligned_free(tmphi); } +#endif void adm_buffer_copy(const void *src, void *dst, int linewidth, int h, int src_stride, int dst_stride) { diff --git a/feature/src/adm_tools.h b/feature/src/adm_tools.h index 45d2868d0..a9c93f591 100644 --- a/feature/src/adm_tools.h +++ b/feature/src/adm_tools.h @@ -25,6 +25,108 @@ #ifndef ADM_TOOLS_H_ #define ADM_TOOLS_H_ +#define ADM_OPT_ENABLE 1 + +#if ADM_OPT_ENABLE +// i = 0, j = 0: indices y: 1,0,1, x: 1,0,1 +#define ADM_CM_THRESH_S_0_0(angles,src_px_stride,accum,w,h,i,j) \ +{ \ + *accum = 0; \ + for (int theta = 0; theta < 3; ++theta) { \ + float *src_ptr = angles[theta]; \ + float row1 = FLOAT_ONE_BY_30 * (2 * fabsf(src_ptr[src_px_stride + 1]) + fabsf(src_ptr[src_px_stride])); \ + float row2 = FLOAT_ONE_BY_30 * 2 * fabsf(src_ptr[1]) + FLOAT_ONE_BY_15 * fabsf(src_ptr[0]); \ + float row3 = row1; \ + *accum += (row1 + row2 + row3); \ + } \ +} + +// i = 0, j = w-1: indices y: 1,0,1, x: w-2, w-1, w-1 +#define ADM_CM_THRESH_S_0_W_M_1(angles,src_px_stride,accum,w,h,i,j) \ +{ \ + *accum = 0; \ + for (int theta = 0; theta < 3; ++theta) { \ + float *src_ptr = angles[theta]; \ + float row1 = FLOAT_ONE_BY_30 * (2 * fabsf(src_ptr[src_px_stride + w - 1]) + fabsf(src_ptr[src_px_stride + w - 2])); \ + float row2 = FLOAT_ONE_BY_30 * (fabsf(src_ptr[w - 2]) + fabsf(src_ptr[w - 1])) + FLOAT_ONE_BY_15 * fabsf(src_ptr[w - 1]); \ + float row3 = row1; \ + *accum += (row1 + row2 + row3); \ + } \ +} + +// i = 0, j = 1, ..., w-2: indices y: 1,0,1, x: j-1,j,j+1 +#define ADM_CM_THRESH_S_0_J(angles,src_px_stride,accum,w,h,i,j) \ +{ \ + *accum = 0; \ + for (int theta = 0; theta < 3; ++theta) { \ + float *src_ptr = angles[theta]; \ + float row1 = FLOAT_ONE_BY_30 * (fabsf(src_ptr[src_px_stride + j - 1]) + fabsf(src_ptr[src_px_stride + j]) + fabsf(src_ptr[src_px_stride + j + 1])); \ + float row2 = FLOAT_ONE_BY_30 * (fabsf(src_ptr[j - 1]) + fabsf(src_ptr[j + 1])) + FLOAT_ONE_BY_15 * fabsf(src_ptr[j]); \ + float row3 = row1; \ + *accum += (row1 + row2 + row3); \ + } \ +} + +// i = h-1, j = 0: indices y: h-2,h-1,h-1, x: 1,0,1 +#define ADM_CM_THRESH_S_H_M_1_0(angles,src_px_stride,accum,w,h,i,j) \ +{ \ + *accum = 0; \ + for (int theta = 0; theta < 3; ++theta) { \ + float *src_ptr = angles[theta]; \ + src_ptr += (src_px_stride * (h - 2)); \ + float row1 = FLOAT_ONE_BY_30 * (2 * fabsf(src_ptr[1]) + fabsf(src_ptr[0])); \ + float row2 = FLOAT_ONE_BY_30 * 2 * fabsf(src_ptr[src_px_stride + 1]) + FLOAT_ONE_BY_15 * fabsf(src_ptr[src_px_stride]); \ + float row3 = row2; \ + *accum += (row1 + row2 + row3); \ + } \ +} + +// i = h-1, j = w-1: indices y: h-2,h-1,h-1, x: w-2, w-1, w-1 +#define ADM_CM_THRESH_S_H_M_1_W_M_1(angles,src_px_stride,accum,w,h,i,j) \ +{ \ + *accum = 0; \ + for (int theta = 0; theta < 3; ++theta) { \ + float *src_ptr = angles[theta]; \ + src_ptr += (src_px_stride * (h - 2)); \ + float row1 = FLOAT_ONE_BY_30 * (2 * fabsf(src_ptr[w - 1]) + fabsf(src_ptr[w - 2])); \ + float row2 = FLOAT_ONE_BY_30 * (fabsf(src_ptr[src_px_stride + w - 2]) + fabsf(src_ptr[src_px_stride + w - 1])) + FLOAT_ONE_BY_15 * fabsf(src_ptr[src_px_stride + w - 1]); \ + float row3 = row2; \ + *accum += (row1 + row2 + row3); \ + } \ +} + +// i = h-1, j = 1, ..., w-2: indices y: h-2,h-1,h-1, x: j-1,j,j+1 +#define ADM_CM_THRESH_S_H_M_1_J(angles,src_px_stride,accum,w,h,i,j) \ +{ \ + *accum = 0; \ + for (int theta = 0; theta < 3; ++theta) { \ + float *src_ptr = angles[theta]; \ + src_ptr += (src_px_stride * (h - 2)); \ + float row1 = FLOAT_ONE_BY_30 * (fabsf(src_ptr[j - 1]) + fabsf(src_ptr[j]) + fabsf(src_ptr[j + 1])); \ + float row2 = FLOAT_ONE_BY_30 * (fabsf(src_ptr[src_px_stride + j - 1]) + fabsf(src_ptr[src_px_stride + j + 1])) + FLOAT_ONE_BY_15 * fabsf(src_ptr[src_px_stride + j]); \ + float row3 = row2; \ + *accum += (row1 + row2 + row3); \ + } \ +} + +// i = 1,..,h-2, j = 1,..,w-2: indices y: i-1,i,i+1, x: j-1,j,j+1 +#define ADM_CM_THRESH_S_I_J(angles,src_px_stride,accum,w,h,i,j) \ +{ \ + float sum = 0; \ + for (int theta = 0; theta < 3; ++theta) { \ + float *src_ptr = angles[theta]; \ + src_ptr += (src_px_stride * (i-1)); \ + float row1 = FLOAT_ONE_BY_30 * (fabsf(src_ptr[j - 1]) + fabsf(src_ptr[j]) + fabsf(src_ptr[j + 1])); \ + src_ptr += src_px_stride; \ + float row2 = FLOAT_ONE_BY_30 * (fabsf(src_ptr[j - 1]) + fabsf(src_ptr[j + 1])) + FLOAT_ONE_BY_15 * fabsf(src_ptr[j]); \ + src_ptr += src_px_stride; \ + float row3 = FLOAT_ONE_BY_30 * (fabsf(src_ptr[j - 1]) + fabsf(src_ptr[j]) + fabsf(src_ptr[j + 1])); \ + sum += (row1 + row2 + row3); \ + } \ + *accum = sum; \ +} +#endif + typedef struct adm_dwt_band_t_s { float *band_a; /* Low-pass V + low-pass H. */ float *band_v; /* Low-pass V + high-pass H. */ @@ -32,6 +134,26 @@ typedef struct adm_dwt_band_t_s { float *band_d; /* High-pass V + high-pass H. */ } adm_dwt_band_t_s; +#if ADM_OPT_ENABLE + +float adm_sum_cube_s(const float *x, int w, int h, int stride, double border_factor); + +void adm_decouple_s(const adm_dwt_band_t_s *ref, const adm_dwt_band_t_s *dis, const adm_dwt_band_t_s *r, const adm_dwt_band_t_s *a, int w, int h, int ref_stride, int dis_stride, int r_stride, int a_stride, double border_factor); + +void adm_csf_s(const adm_dwt_band_t_s *src, const adm_dwt_band_t_s *dst, int orig_h, int scale, int w, int h, int src_stride, int dst_stride, double border_factor); + +void adm_cm_thresh_s(const adm_dwt_band_t_s *src, float *dst, int w, int h, int src_stride, int dst_stride); + +float adm_csf_den_scale_s(const adm_dwt_band_t_s *src, int orig_h, int scale, int w, int h, int src_stride, double border_factor); + +float adm_cm_s(const adm_dwt_band_t_s *src, const adm_dwt_band_t_s *dst, const adm_dwt_band_t_s *csf_a, int w, int h, int src_stride, int dst_stride, int csf_a_stride, double border_factor, int scale); + +void dwt2_src_indices_filt_s(int **src_ind_y, int **src_ind_x, int w, int h); + +void adm_dwt2_s(const float *src, const adm_dwt_band_t_s *dst, int **ind_y, int **ind_x, int w, int h, int src_stride, int dst_stride); + +#else + float adm_sum_cube_s(const float *x, int w, int h, int stride, double border_factor); void adm_decouple_s(const adm_dwt_band_t_s *ref, const adm_dwt_band_t_s *dis, const adm_dwt_band_t_s *r, const adm_dwt_band_t_s *a, int w, int h, int ref_stride, int dis_stride, int r_stride, int a_stride); @@ -46,6 +168,8 @@ void adm_dwt2_s(const float *src, const adm_dwt_band_t_s *dst, int w, int h, int void adm_buffer_copy(const void *src, void *dst, int linewidth, int h, int src_stride, int dst_stride); +#endif + /* ================= */ /* Noise floor model */ /* ================= */ diff --git a/feature/src/common/blur_array.c b/feature/src/common/blur_array.c index 6352e82bc..0714e1a34 100644 --- a/feature/src/common/blur_array.c +++ b/feature/src/common/blur_array.c @@ -21,7 +21,9 @@ int init_blur_array(BLUR_BUF_ARRAY* arr, int array_length, size_t size, size_t a { arr->blur_buf_array[i].frame_idx = -1; arr->blur_buf_array[i].blur_buf = aligned_malloc(size, alignement); - +#if BUF_OPT_ENABLE + arr->blur_buf_array[i].reference_count = 0; +#endif if (arr->blur_buf_array[i].blur_buf == 0) return 0; @@ -39,6 +41,32 @@ int init_blur_array(BLUR_BUF_ARRAY* arr, int array_length, size_t size, size_t a */ float* get_blur_buf(BLUR_BUF_ARRAY* arr, int search_frame_idx) { +#if BUF_OPT_ENABLE + int array_length = arr->actual_length; + BLUR_BUF_STRUCT* s = arr->blur_buf_array; + float *ret = NULL; + + pthread_mutex_lock(&arr->block); + + for (int i = 0; i < array_length; i++) + { + if (s->frame_idx == search_frame_idx) + { + /* Increment reference counter */ + s->reference_count++; + + ret = s->blur_buf; + break; + } + + // next array item + s++; + } + + pthread_mutex_unlock(&arr->block); + + return ret; +#else // find item for the search_frame_idx while (1) { @@ -61,7 +89,7 @@ float* get_blur_buf(BLUR_BUF_ARRAY* arr, int search_frame_idx) pthread_mutex_unlock(&arr->block); } - +#endif return 0; } @@ -99,7 +127,11 @@ int put_blur_buf(BLUR_BUF_ARRAY* arr, int frame_idx, float* blur_buf) /* * resets the slot in the array to -1 to indicate that the buffer can be used again */ +#if BUF_OPT_ENABLE +int release_blur_buf_slot(BLUR_BUF_ARRAY* arr, int search_frame_idx) +#else int release_blur_buf(BLUR_BUF_ARRAY* arr, int search_frame_idx) +#endif { int ret = 0; int array_length = arr->actual_length; @@ -111,8 +143,20 @@ int release_blur_buf(BLUR_BUF_ARRAY* arr, int search_frame_idx) { if (s->frame_idx == search_frame_idx) { +#if BUF_OPT_ENABLE + if(s->reference_count <= 0) + { + s->frame_idx = -1; + ret = 1; + } + else + { + ret = -1; + } +#else s->frame_idx = -1; ret = 1; +#endif break; } @@ -144,3 +188,96 @@ void free_blur_buf(BLUR_BUF_ARRAY* arr) pthread_mutex_destroy(&arr->block); } + +#if BUF_OPT_ENABLE +/* + * finds a free slot in the array, assigns the new frame index and returns the free buffer pointer + * This increases the reference count for this slot + */ +float* get_free_blur_buf_slot(BLUR_BUF_ARRAY* arr, int frame_idx) +{ + int array_length = arr->actual_length; + BLUR_BUF_STRUCT* s = arr->blur_buf_array; + float *ret = NULL; + pthread_mutex_lock(&arr->block); + + for (int i = 0; i < array_length; i++) + { + if (s->frame_idx == -1) + { + s->frame_idx = frame_idx; + + /* Increment reference counter */ + s->reference_count++; + + ret = s->blur_buf; + break; + } + + // next array item + s++; + } + pthread_mutex_unlock(&arr->block); + + return ret; +} + +/* + * Returns the reference counter for the frame index if found, -1 otherwise +*/ +int get_blur_buf_reference_count(BLUR_BUF_ARRAY* arr, int frame_idx) +{ + int array_length = arr->actual_length; + BLUR_BUF_STRUCT* s = arr->blur_buf_array; + int ret = -1; + + pthread_mutex_lock(&arr->block); + + for (int i = 0; i < array_length; i++) + { + if (s->frame_idx == frame_idx) + { + ret = s->reference_count; + break; + } + + // next array item + s++; + } + + pthread_mutex_unlock(&arr->block); + + return ret; +} + +/* + * releases the reference for the slot in the array which matches the search frame index + */ +int release_blur_buf_reference(BLUR_BUF_ARRAY* arr, int search_frame_idx) +{ + int ret = -1; + int array_length = arr->actual_length; + BLUR_BUF_STRUCT* s = arr->blur_buf_array; + + pthread_mutex_lock(&arr->block); + + for (int i = 0; i < array_length; i++) + { + if (s->frame_idx == search_frame_idx) + { + s->reference_count--; + ret = 0; + break; + } + + // next struct + s++; + } + + pthread_mutex_unlock(&arr->block); + + return ret; +} + + +#endif diff --git a/feature/src/common/blur_array.h b/feature/src/common/blur_array.h index 6ca7a27d9..1209a590f 100644 --- a/feature/src/common/blur_array.h +++ b/feature/src/common/blur_array.h @@ -12,11 +12,19 @@ #include "pthread.h" #include "alloc.h" +#ifdef MULTI_THREADING +#define BUF_OPT_ENABLE 1 +#else +#define BUF_OPT_ENABLE 0 +#endif + #define MAX_NUM_THREADS 128 typedef struct { int frame_idx; float *blur_buf; + int reference_count; + } BLUR_BUF_STRUCT; @@ -31,12 +39,26 @@ typedef struct int init_blur_array(BLUR_BUF_ARRAY* arr, int array_length, size_t size, size_t alignement); -float* get_blur_buf(BLUR_BUF_ARRAY* arr, int search_frame_idx); +#if BUF_OPT_ENABLE -int put_blur_buf(BLUR_BUF_ARRAY* arr, int frame_idx, float* blur_buf); +float* get_free_blur_buf_slot(BLUR_BUF_ARRAY* arr, int frame_idx); + +int get_blur_buf_reference_count(BLUR_BUF_ARRAY* arr, int frame_idx); + +int release_blur_buf_slot(BLUR_BUF_ARRAY* arr, int search_frame_idx); + +int release_blur_buf_reference(BLUR_BUF_ARRAY* arr, int search_frame_idx); + +#else int release_blur_buf(BLUR_BUF_ARRAY* arr, int search_frame_idx); +#endif + +float* get_blur_buf(BLUR_BUF_ARRAY* arr, int search_frame_idx); + +int put_blur_buf(BLUR_BUF_ARRAY* arr, int frame_idx, float* blur_buf); + void free_blur_buf(BLUR_BUF_ARRAY* arr); #endif /* VMAF_FEATURE_SRC_BLUR_ARRAY_H_ */ diff --git a/feature/src/common/convolution.c b/feature/src/common/convolution.c index e8691cb6d..dcea7b4ea 100644 --- a/feature/src/common/convolution.c +++ b/feature/src/common/convolution.c @@ -17,6 +17,7 @@ */ #include "alignment.h" +#include "vif_options.h" #include "convolution.h" #include "convolution_internal.h" #include "cpu.h" diff --git a/feature/src/common/convolution.h b/feature/src/common/convolution.h index 396077d16..df7990018 100644 --- a/feature/src/common/convolution.h +++ b/feature/src/common/convolution.h @@ -37,4 +37,9 @@ void convolution_f32_c_s(const float *filter, int filter_width, const float *src void convolution_f32_avx_s(const float *filter, int filter_width, const float *src, float *dst, float *tmp, int width, int height, int src_stride, int dst_stride); +#if VIF_OPT_ENABLE +void convolution_f32_avx_sq_s(const float *filter, int filter_width, const float *src, float *dst, float *tmp, int width, int height, int src_stride, int dst_stride); + +void convolution_f32_avx_xy_s(const float *filter, int filter_width, const float *src1, const float *src2, float *dst, float *tmp, int width, int height, int src1_stride, int src2_stride, int dst_stride); +#endif #endif // CONVOLUTION_H_ diff --git a/feature/src/common/convolution_avx.c b/feature/src/common/convolution_avx.c index c3251c436..44d22c504 100644 --- a/feature/src/common/convolution_avx.c +++ b/feature/src/common/convolution_avx.c @@ -18,6 +18,7 @@ #include #include "alignment.h" +#include "vif_options.h" #include "convolution.h" #include "convolution_internal.h" @@ -28,6 +29,21 @@ FORCE_INLINE inline void convolution_f32_avx_s_1d_v_scanline_5(const float * RES FORCE_INLINE inline void convolution_f32_avx_s_1d_v_scanline_9(const float * RESTRICT filter, int filter_width, const float * RESTRICT src, float * RESTRICT dst, int src_stride, int j_end); FORCE_INLINE inline void convolution_f32_avx_s_1d_v_scanline_17(const float * RESTRICT filter, int filter_width, const float * RESTRICT src, float * RESTRICT dst, int src_stride, int j_end); +#if VIF_OPT_ENABLE +FORCE_INLINE inline void convolution_f32_avx_s_1d_h_sq_scanline_5(const float * RESTRICT filter, int filter_width, const float * RESTRICT src, float * RESTRICT dst, int j_end); +FORCE_INLINE inline void convolution_f32_avx_s_1d_h_sq_scanline_9(const float * RESTRICT filter, int filter_width, const float * RESTRICT src, float * RESTRICT dst, int j_end); +FORCE_INLINE inline void convolution_f32_avx_s_1d_h_sq_scanline_17(const float * RESTRICT filter, int filter_width, const float * RESTRICT src, float * RESTRICT dst, int j_end); +FORCE_INLINE inline void convolution_f32_avx_s_1d_v_sq_scanline_5(const float * RESTRICT filter, int filter_width, const float * RESTRICT src, float * RESTRICT dst, int src_stride, int j_end); +FORCE_INLINE inline void convolution_f32_avx_s_1d_v_sq_scanline_9(const float * RESTRICT filter, int filter_width, const float * RESTRICT src, float * RESTRICT dst, int src_stride, int j_end); +FORCE_INLINE inline void convolution_f32_avx_s_1d_v_sq_scanline_17(const float * RESTRICT filter, int filter_width, const float * RESTRICT src, float * RESTRICT dst, int src_stride, int j_end); + +FORCE_INLINE inline void convolution_f32_avx_s_1d_h_xy_scanline_5(const float * RESTRICT filter, int filter_width, const float * RESTRICT src1, const float * RESTRICT src2, float * RESTRICT dst, int j_end); +FORCE_INLINE inline void convolution_f32_avx_s_1d_h_xy_scanline_9(const float * RESTRICT filter, int filter_width, const float * RESTRICT src1, const float * RESTRICT src2, float * RESTRICT dst, int j_end); +FORCE_INLINE inline void convolution_f32_avx_s_1d_h_xy_scanline_17(const float * RESTRICT filter, int filter_width, const float * RESTRICT src1, const float * RESTRICT src2, float * RESTRICT dst, int j_end); +FORCE_INLINE inline void convolution_f32_avx_s_1d_v_xy_scanline_5(const float * RESTRICT filter, int filter_width, const float * RESTRICT src1, const float * RESTRICT src2, float * RESTRICT dst, int src1_stride, int src2_stride, int j_end); +FORCE_INLINE inline void convolution_f32_avx_s_1d_v_xy_scanline_9(const float * RESTRICT filter, int filter_width, const float * RESTRICT src1, const float * RESTRICT src2, float * RESTRICT dst, int src1_stride, int src2_stride, int j_end); +FORCE_INLINE inline void convolution_f32_avx_s_1d_v_xy_scanline_17(const float * RESTRICT filter, int filter_width, const float * RESTRICT src1, const float * RESTRICT src2, float * RESTRICT dst, int src1_stride, int src2_stride, int j_end); +#endif FORCE_INLINE inline static void convolution_f32_avx_s_3x3_2d_scanline(const float * RESTRICT filter, const float * RESTRICT src, float * RESTRICT dst, int src_stride, int j_end) { __m256 f00, f01, f02, f10, f11, f12, f20, f21, f22; @@ -856,3 +872,1779 @@ void convolution_f32_avx_s(const float *filter, int filter_width, const float *s break; } } + +#if VIF_OPT_ENABLE +// Filter a single scanline. +FORCE_INLINE inline static void convolution_f32_avx_s_1d_h_sq_scanline(int N, const float * RESTRICT filter, int filter_width, const float * RESTRICT src, float * RESTRICT dst, int j_end) +{ + + if (N == 5) + { + convolution_f32_avx_s_1d_h_sq_scanline_5(filter, filter_width, src, dst, j_end); + } + else if (N == 9) + { + convolution_f32_avx_s_1d_h_sq_scanline_9(filter, filter_width, src, dst, j_end); + } + else if (N == 17) + { + convolution_f32_avx_s_1d_h_sq_scanline_17(filter, filter_width, src, dst, j_end); + } + else { + + int radius = filter_width / 2; + + for (int x = 0; x < filter_width; x += 9) { + __m256 f0, f1, f2, f3, f4, f5, f6, f7, f8; + + f0 = _mm256_setzero_ps(); + f1 = _mm256_setzero_ps(); + f2 = _mm256_setzero_ps(); + f3 = _mm256_setzero_ps(); + f5 = _mm256_setzero_ps(); + f6 = _mm256_setzero_ps(); + f7 = _mm256_setzero_ps(); + f8 = _mm256_setzero_ps(); + + switch (filter_width - x) { + default: + f8 = _mm256_broadcast_ss(filter + x + 8); + case 8: + f7 = _mm256_broadcast_ss(filter + x + 7); + case 7: + f6 = _mm256_broadcast_ss(filter + x + 6); + case 6: + f5 = _mm256_broadcast_ss(filter + x + 5); + case 5: + f4 = _mm256_broadcast_ss(filter + x + 4); + case 4: + f3 = _mm256_broadcast_ss(filter + x + 3); + case 3: + f2 = _mm256_broadcast_ss(filter + x + 2); + case 2: + f1 = _mm256_broadcast_ss(filter + x + 1); + case 1: + f0 = _mm256_broadcast_ss(filter + x + 0); + } + + for (int j = 0; j < j_end; j += 8) { + __m256 accum = _mm256_setzero_ps(); + __m256 sum0, sum1, sum2, sum3; + __m256 g; + + sum0 = _mm256_setzero_ps(); + sum1 = _mm256_setzero_ps(); + sum2 = _mm256_setzero_ps(); + sum3 = _mm256_setzero_ps(); + + switch (filter_width - x) { + default: + g = _mm256_loadu_ps(src + j + x + 8); + g = _mm256_mul_ps(g, g); + sum0 = _mm256_mul_ps(f8, g); + case 8: + g = _mm256_loadu_ps(src + j + x + 7); + g = _mm256_mul_ps(g, g); + sum3 = _mm256_mul_ps(f7, g); + case 7: + g = _mm256_loadu_ps(src + j + x + 6); + g = _mm256_mul_ps(g, g); + sum2 = _mm256_mul_ps(f6, g); + case 6: + g = _mm256_loadu_ps(src + j + x + 5); + g = _mm256_mul_ps(g, g); + sum1 = _mm256_mul_ps(f5, g); + case 5: + g = _mm256_loadu_ps(src + j + x + 4); + g = _mm256_mul_ps(g, g); + g = _mm256_mul_ps(f4, g); + sum0 = _mm256_add_ps(sum0, g); + case 4: + g = _mm256_loadu_ps(src + j + x + 3); + g = _mm256_mul_ps(g, g); + g = _mm256_mul_ps(f3, g); + sum3 = _mm256_add_ps(sum3, g); + case 3: + g = _mm256_loadu_ps(src + j + x + 2); + g = _mm256_mul_ps(g, g); + g = _mm256_mul_ps(f2, g); + sum2 = _mm256_add_ps(sum2, g); + case 2: + g = _mm256_loadu_ps(src + j + x + 1); + g = _mm256_mul_ps(g, g); + g = _mm256_mul_ps(f1, g); + sum1 = _mm256_add_ps(sum1, g); + case 1: + g = _mm256_loadu_ps(src + j + x + 0); + g = _mm256_mul_ps(g, g); + g = _mm256_mul_ps(f0, g); + sum0 = _mm256_add_ps(sum0, g); + } + + sum0 = _mm256_add_ps(sum0, sum2); + sum1 = _mm256_add_ps(sum1, sum3); + + sum0 = _mm256_add_ps(sum0, sum1); + accum = _mm256_add_ps(accum, sum0); + + if (x) + accum = _mm256_add_ps(accum, _mm256_loadu_ps(dst + j + radius)); + + _mm256_storeu_ps(dst + j + radius, accum); + } + } + + } +} + +FORCE_INLINE inline void convolution_f32_avx_s_1d_h_sq_scanline_17(const float * RESTRICT filter, int filter_width, const float * RESTRICT src, float * RESTRICT dst, int j_end) +{ + __m256 f0, f1, f2, f3, f4, f5, f6, f7, f8; + + // Evaluate filter taps 0-8 + f0 = _mm256_broadcast_ss(filter + 0); + f1 = _mm256_broadcast_ss(filter + 1); + f2 = _mm256_broadcast_ss(filter + 2); + f3 = _mm256_broadcast_ss(filter + 3); + f4 = _mm256_broadcast_ss(filter + 4); + f5 = _mm256_broadcast_ss(filter + 5); + f6 = _mm256_broadcast_ss(filter + 6); + f7 = _mm256_broadcast_ss(filter + 7); + f8 = _mm256_broadcast_ss(filter + 8); + + for (int j = 0; j < j_end; j += 8) { + __m256 accum = _mm256_setzero_ps(); + __m256 sum0, sum1, sum2, sum3; + __m256 g; + + g = _mm256_loadu_ps(src + j + 0); + g = _mm256_mul_ps(g, g); + g = _mm256_mul_ps(f0, g); + sum0 = g; + + g = _mm256_loadu_ps(src + j + 1); + g = _mm256_mul_ps(g, g); + g = _mm256_mul_ps(f1, g); + sum1 = g; + + g = _mm256_loadu_ps(src + j + 2); + g = _mm256_mul_ps(g, g); + g = _mm256_mul_ps(f2, g); + sum2 = g; + + g = _mm256_loadu_ps(src + j + 3); + g = _mm256_mul_ps(g, g); + g = _mm256_mul_ps(f3, g); + sum3 = g; + + g = _mm256_loadu_ps(src + j + 4); + g = _mm256_mul_ps(g, g); + g = _mm256_mul_ps(f4, g); + sum0 = _mm256_add_ps(sum0, g); + + g = _mm256_loadu_ps(src + j + 5); + g = _mm256_mul_ps(g, g); + g = _mm256_mul_ps(f5, g); + sum1 = _mm256_add_ps(sum1, g); + + g = _mm256_loadu_ps(src + j + 6); + g = _mm256_mul_ps(g, g); + g = _mm256_mul_ps(f6, g); + sum2 = _mm256_add_ps(sum2, g); + + g = _mm256_loadu_ps(src + j + 7); + g = _mm256_mul_ps(g, g); + g = _mm256_mul_ps(f7, g); + sum3 = _mm256_add_ps(sum3, g); + + g = _mm256_loadu_ps(src + j + 8); + g = _mm256_mul_ps(g, g); + g = _mm256_mul_ps(f8, g); + sum0 = _mm256_add_ps(sum0, g); + + sum0 = _mm256_add_ps(sum0, sum2); + sum1 = _mm256_add_ps(sum1, sum3); + + sum0 = _mm256_add_ps(sum0, sum1); + accum = _mm256_add_ps(accum, sum0); + + _mm256_store_ps(dst + j + 8, accum); // radius = 8 + } + + // Evaluate filter taps 9-16 + f0 = _mm256_broadcast_ss(filter + 9); + f1 = _mm256_broadcast_ss(filter + 10); + f2 = _mm256_broadcast_ss(filter + 11); + f3 = _mm256_broadcast_ss(filter + 12); + f4 = _mm256_broadcast_ss(filter + 13); + f5 = _mm256_broadcast_ss(filter + 14); + f6 = _mm256_broadcast_ss(filter + 15); + f7 = _mm256_broadcast_ss(filter + 16); + + for (int j = 0; j < j_end; j += 8) { + __m256 sum0, sum1, sum2, sum3; + __m256 g; + + float *dst_ptr = dst + j + 8; // radius = 8 + + g = _mm256_loadu_ps(src + j + 9); + g = _mm256_mul_ps(g, g); + g = _mm256_mul_ps(f0, g); + sum0 = g; + + g = _mm256_loadu_ps(src + j + 10); + g = _mm256_mul_ps(g, g); + g = _mm256_mul_ps(f1, g); + sum1 = g; + + g = _mm256_loadu_ps(src + j + 11); + g = _mm256_mul_ps(g, g); + g = _mm256_mul_ps(f2, g); + sum2 = g; + + g = _mm256_loadu_ps(src + j + 12); + g = _mm256_mul_ps(g, g); + g = _mm256_mul_ps(f3, g); + sum3 = g; + + g = _mm256_loadu_ps(src + j + 13); + g = _mm256_mul_ps(g, g); + g = _mm256_mul_ps(f4, g); + sum0 = _mm256_add_ps(sum0, g); + + g = _mm256_loadu_ps(src + j + 14); + g = _mm256_mul_ps(g, g); + g = _mm256_mul_ps(f5, g); + sum1 = _mm256_add_ps(sum1, g); + + g = _mm256_loadu_ps(src + j + 15); + g = _mm256_mul_ps(g, g); + g = _mm256_mul_ps(f6, g); + sum2 = _mm256_add_ps(sum2, g); + + g = _mm256_loadu_ps(src + j + 16); + g = _mm256_mul_ps(g, g); + g = _mm256_mul_ps(f7, g); + sum3 = _mm256_add_ps(sum3, g); + + sum0 = _mm256_add_ps(sum0, sum2); + sum1 = _mm256_add_ps(sum1, sum3); + + sum0 = _mm256_add_ps(sum0, sum1); + + sum0 = _mm256_add_ps(_mm256_load_ps(dst_ptr), sum0); + _mm256_store_ps(dst_ptr, sum0); + } +} + +FORCE_INLINE inline void convolution_f32_avx_s_1d_h_sq_scanline_9(const float * RESTRICT filter, int filter_width, const float * RESTRICT src, float * RESTRICT dst, int j_end) +{ + __m256 f0, f1, f2, f3, f4, f5, f6, f7, f8; + + f0 = _mm256_broadcast_ss(filter + 0); + f1 = _mm256_broadcast_ss(filter + 1); + f2 = _mm256_broadcast_ss(filter + 2); + f3 = _mm256_broadcast_ss(filter + 3); + f4 = _mm256_broadcast_ss(filter + 4); + f5 = _mm256_broadcast_ss(filter + 5); + f6 = _mm256_broadcast_ss(filter + 6); + f7 = _mm256_broadcast_ss(filter + 7); + f8 = _mm256_broadcast_ss(filter + 8); + + for (int j = 0; j < j_end; j += 8) { + __m256 accum = _mm256_setzero_ps(); + __m256 sum0, sum1, sum2, sum3; + __m256 g; + + g = _mm256_loadu_ps(src + j + 0); + g = _mm256_mul_ps(g, g); + g = _mm256_mul_ps(f0, g); + sum0 = g; + + g = _mm256_loadu_ps(src + j + 1); + g = _mm256_mul_ps(g, g); + g = _mm256_mul_ps(f1, g); + sum1 = g; + + g = _mm256_loadu_ps(src + j + 2); + g = _mm256_mul_ps(g, g); + g = _mm256_mul_ps(f2, g); + sum2 = g; + + g = _mm256_loadu_ps(src + j + 3); + g = _mm256_mul_ps(g, g); + g = _mm256_mul_ps(f3, g); + sum3 = g; + + g = _mm256_loadu_ps(src + j + 4); + g = _mm256_mul_ps(g, g); + g = _mm256_mul_ps(f4, g); + sum0 = _mm256_add_ps(sum0, g); + + g = _mm256_loadu_ps(src + j + 5); + g = _mm256_mul_ps(g, g); + g = _mm256_mul_ps(f5, g); + sum1 = _mm256_add_ps(sum1, g); + + g = _mm256_loadu_ps(src + j + 6); + g = _mm256_mul_ps(g, g); + g = _mm256_mul_ps(f6, g); + sum2 = _mm256_add_ps(sum2, g); + + g = _mm256_loadu_ps(src + j + 7); + g = _mm256_mul_ps(g, g); + g = _mm256_mul_ps(f7, g); + sum3 = _mm256_add_ps(sum3, g); + + g = _mm256_loadu_ps(src + j + 8); + g = _mm256_mul_ps(g, g); + g = _mm256_mul_ps(f8, g); + sum0 = _mm256_add_ps(sum0, g); + + sum0 = _mm256_add_ps(sum0, sum2); + sum1 = _mm256_add_ps(sum1, sum3); + + sum0 = _mm256_add_ps(sum0, sum1); + accum = _mm256_add_ps(accum, sum0); + + _mm256_storeu_ps(dst + j + 4, accum); // radius = 4 + } +} + +FORCE_INLINE inline void convolution_f32_avx_s_1d_h_sq_scanline_5(const float * RESTRICT filter, int filter_width, const float * RESTRICT src, float * RESTRICT dst, int j_end) +{ + __m256 f0, f1, f2, f3, f4; + + f0 = _mm256_broadcast_ss(filter + 0); + f1 = _mm256_broadcast_ss(filter + 1); + f2 = _mm256_broadcast_ss(filter + 2); + f3 = _mm256_broadcast_ss(filter + 3); + f4 = _mm256_broadcast_ss(filter + 4); + + for (int j = 0; j < j_end; j += 8) { + __m256 accum = _mm256_setzero_ps(); + __m256 sum0, sum1, sum2, sum3; + __m256 g; + + g = _mm256_loadu_ps(src + j + 0); + g = _mm256_mul_ps(g, g); + g = _mm256_mul_ps(f0, g); + sum0 = g; + + g = _mm256_loadu_ps(src + j + 1); + g = _mm256_mul_ps(g, g); + g = _mm256_mul_ps(f1, g); + sum1 = g; + + g = _mm256_loadu_ps(src + j + 2); + g = _mm256_mul_ps(g, g); + g = _mm256_mul_ps(f2, g); + sum2 = g; + + g = _mm256_loadu_ps(src + j + 3); + g = _mm256_mul_ps(g, g); + g = _mm256_mul_ps(f3, g); + sum3 = g; + + g = _mm256_loadu_ps(src + j + 4); + g = _mm256_mul_ps(g, g); + g = _mm256_mul_ps(f4, g); + sum0 = _mm256_add_ps(sum0, g); + + sum0 = _mm256_add_ps(sum0, sum2); + sum1 = _mm256_add_ps(sum1, sum3); + + sum0 = _mm256_add_ps(sum0, sum1); + accum = _mm256_add_ps(accum, sum0); + + _mm256_storeu_ps(dst + j + 2, accum); // radius = 2 + } +} + +// Filter a single scanline. +FORCE_INLINE inline static void convolution_f32_avx_s_1d_v_sq_scanline(int N, const float * RESTRICT filter, int filter_width, const float * RESTRICT src, float * RESTRICT dst, int src_stride, int j_end) +{ + + if (N == 5) + { + convolution_f32_avx_s_1d_v_sq_scanline_5(filter, filter_width, src, dst, src_stride, j_end); + } + else if (N == 9) + { + convolution_f32_avx_s_1d_v_sq_scanline_9(filter, filter_width, src, dst, src_stride, j_end); + } + else if (N == 17) + { + convolution_f32_avx_s_1d_v_sq_scanline_17(filter, filter_width, src, dst, src_stride, j_end); + } + else { + + int radius = filter_width / 2; + src -= radius * src_stride; + + for (int y = 0; y < filter_width; y += 9) { + __m256 f0, f1, f2, f3, f4, f5, f6, f7, f8; + + f0 = _mm256_setzero_ps(); + f1 = _mm256_setzero_ps(); + f2 = _mm256_setzero_ps(); + f3 = _mm256_setzero_ps(); + f5 = _mm256_setzero_ps(); + f6 = _mm256_setzero_ps(); + f7 = _mm256_setzero_ps(); + f8 = _mm256_setzero_ps(); + + switch (filter_width - y) { + default: + f8 = _mm256_broadcast_ss(filter + y + 8); + case 8: + f7 = _mm256_broadcast_ss(filter + y + 7); + case 7: + f6 = _mm256_broadcast_ss(filter + y + 6); + case 6: + f5 = _mm256_broadcast_ss(filter + y + 5); + case 5: + f4 = _mm256_broadcast_ss(filter + y + 4); + case 4: + f3 = _mm256_broadcast_ss(filter + y + 3); + case 3: + f2 = _mm256_broadcast_ss(filter + y + 2); + case 2: + f1 = _mm256_broadcast_ss(filter + y + 1); + case 1: + f0 = _mm256_broadcast_ss(filter + y + 0); + } + + for (int j = 0; j < j_end; j += 8) { + __m256 accum = _mm256_setzero_ps(); + __m256 sum0, sum1, sum2, sum3; + __m256 g; + + sum0 = _mm256_setzero_ps(); + sum1 = _mm256_setzero_ps(); + sum2 = _mm256_setzero_ps(); + sum3 = _mm256_setzero_ps(); + + switch (filter_width - y) { + default: + g = _mm256_load_ps(src + (y + 8) * src_stride + j); + g = _mm256_mul_ps(g, g); + sum0 = _mm256_mul_ps(f8, g); + case 8: + g = _mm256_load_ps(src + (y + 7) * src_stride + j); + g = _mm256_mul_ps(g, g); + sum3 = _mm256_mul_ps(f7, g); + case 7: + g = _mm256_load_ps(src + (y + 6) * src_stride + j); + g = _mm256_mul_ps(g, g); + sum2 = _mm256_mul_ps(f6, g); + case 6: + g = _mm256_load_ps(src + (y + 5) * src_stride + j); + g = _mm256_mul_ps(g, g); + sum1 = _mm256_mul_ps(f5, g); + case 5: + g = _mm256_load_ps(src + (y + 4) * src_stride + j); + g = _mm256_mul_ps(g, g); + g = _mm256_mul_ps(f4, g); + sum0 = _mm256_add_ps(sum0, g); + case 4: + g = _mm256_load_ps(src + (y + 3) * src_stride + j); + g = _mm256_mul_ps(g, g); + g = _mm256_mul_ps(f3, g); + sum3 = _mm256_add_ps(sum3, g); + case 3: + g = _mm256_load_ps(src + (y + 2) * src_stride + j); + g = _mm256_mul_ps(g, g); + g = _mm256_mul_ps(f2, g); + sum2 = _mm256_add_ps(sum2, g); + case 2: + g = _mm256_load_ps(src + (y + 1) * src_stride + j); + g = _mm256_mul_ps(g, g); + g = _mm256_mul_ps(f1, g); + sum1 = _mm256_add_ps(sum1, g); + case 1: + g = _mm256_load_ps(src + (y + 0) * src_stride + j); + g = _mm256_mul_ps(g, g); + g = _mm256_mul_ps(f0, g); + sum0 = _mm256_add_ps(sum0, g); + } + + sum0 = _mm256_add_ps(sum0, sum2); + sum1 = _mm256_add_ps(sum1, sum3); + + sum0 = _mm256_add_ps(sum0, sum1); + accum = _mm256_add_ps(accum, sum0); + + if (y) + accum = _mm256_add_ps(accum, _mm256_load_ps(dst + j)); + + _mm256_store_ps(dst + j, accum); + } + } + } +} + +FORCE_INLINE inline void convolution_f32_avx_s_1d_v_sq_scanline_17(const float * RESTRICT filter, int filter_width, const float * RESTRICT src, float * RESTRICT dst, int src_stride, int j_end) +{ + __m256 f0, f1, f2, f3, f4, f5, f6, f7, f8; + src -= 8 * src_stride; // radius = 8 + + // Evaluate filter taps 0-8 + f0 = _mm256_broadcast_ss(filter + 0); + f1 = _mm256_broadcast_ss(filter + 1); + f2 = _mm256_broadcast_ss(filter + 2); + f3 = _mm256_broadcast_ss(filter + 3); + f4 = _mm256_broadcast_ss(filter + 4); + f5 = _mm256_broadcast_ss(filter + 5); + f6 = _mm256_broadcast_ss(filter + 6); + f7 = _mm256_broadcast_ss(filter + 7); + f8 = _mm256_broadcast_ss(filter + 8); + + for (int j = 0; j < j_end; j += 8) { + __m256 sum0, sum1, sum2, sum3; + __m256 g; + + g = _mm256_load_ps(src + 0 * src_stride + j); + g = _mm256_mul_ps(g, g); + g = _mm256_mul_ps(f0, g); + sum0 = g; + + g = _mm256_load_ps(src + 1 * src_stride + j); + g = _mm256_mul_ps(g, g); + g = _mm256_mul_ps(f1, g); + sum1 = g; + + g = _mm256_load_ps(src + 2 * src_stride + j); + g = _mm256_mul_ps(g, g); + g = _mm256_mul_ps(f2, g); + sum2 = g; + + g = _mm256_load_ps(src + 3 * src_stride + j); + g = _mm256_mul_ps(g, g); + g = _mm256_mul_ps(f3, g); + sum3 = g; + + g = _mm256_load_ps(src + 4 * src_stride + j); + g = _mm256_mul_ps(g, g); + g = _mm256_mul_ps(f4, g); + sum0 = _mm256_add_ps(sum0, g); + + g = _mm256_load_ps(src + 5 * src_stride + j); + g = _mm256_mul_ps(g, g); + g = _mm256_mul_ps(f5, g); + sum1 = _mm256_add_ps(sum1, g); + + g = _mm256_load_ps(src + 6 * src_stride + j); + g = _mm256_mul_ps(g, g); + g = _mm256_mul_ps(f6, g); + sum2 = _mm256_add_ps(sum2, g); + + g = _mm256_load_ps(src + 7 * src_stride + j); + g = _mm256_mul_ps(g, g); + g = _mm256_mul_ps(f7, g); + sum3 = _mm256_add_ps(sum3, g); + + g = _mm256_load_ps(src + 8 * src_stride + j); + g = _mm256_mul_ps(g, g); + g = _mm256_mul_ps(f8, g); + sum0 = _mm256_add_ps(sum0, g); + + sum0 = _mm256_add_ps(sum0, sum2); + sum1 = _mm256_add_ps(sum1, sum3); + + sum0 = _mm256_add_ps(sum0, sum1); + + _mm256_store_ps(dst + j, sum0); + } + + // Evaluate filter taps 9-16 + f0 = _mm256_broadcast_ss(filter + 9); + f1 = _mm256_broadcast_ss(filter + 10); + f2 = _mm256_broadcast_ss(filter + 11); + f3 = _mm256_broadcast_ss(filter + 12); + f4 = _mm256_broadcast_ss(filter + 13); + f5 = _mm256_broadcast_ss(filter + 14); + f6 = _mm256_broadcast_ss(filter + 15); + f7 = _mm256_broadcast_ss(filter + 16); + + for (int j = 0; j < j_end; j += 8) { + __m256 sum0, sum1, sum2, sum3; + __m256 g; + + g = _mm256_load_ps(src + 9 * src_stride + j); + g = _mm256_mul_ps(g, g); + g = _mm256_mul_ps(f0, g); + sum0 = g; + + g = _mm256_load_ps(src + 10 * src_stride + j); + g = _mm256_mul_ps(g, g); + g = _mm256_mul_ps(f1, g); + sum1 = g; + + g = _mm256_load_ps(src + 11 * src_stride + j); + g = _mm256_mul_ps(g, g); + g = _mm256_mul_ps(f2, g); + sum2 = g; + + g = _mm256_load_ps(src + 12 * src_stride + j); + g = _mm256_mul_ps(g, g); + g = _mm256_mul_ps(f3, g); + sum3 = g; + + g = _mm256_load_ps(src + 13 * src_stride + j); + g = _mm256_mul_ps(g, g); + g = _mm256_mul_ps(f4, g); + sum0 = _mm256_add_ps(sum0, g); + + g = _mm256_load_ps(src + 14 * src_stride + j); + g = _mm256_mul_ps(g, g); + g = _mm256_mul_ps(f5, g); + sum1 = _mm256_add_ps(sum1, g); + + g = _mm256_load_ps(src + 15 * src_stride + j); + g = _mm256_mul_ps(g, g); + g = _mm256_mul_ps(f6, g); + sum2 = _mm256_add_ps(sum2, g); + + g = _mm256_load_ps(src + 16 * src_stride + j); + g = _mm256_mul_ps(g, g); + g = _mm256_mul_ps(f7, g); + sum3 = _mm256_add_ps(sum3, g); + + sum0 = _mm256_add_ps(sum0, sum2); + sum1 = _mm256_add_ps(sum1, sum3); + + sum0 = _mm256_add_ps(sum0, sum1); + + sum0 = _mm256_add_ps(_mm256_load_ps(dst + j), sum0); + _mm256_store_ps(dst + j, sum0); + } +} + +FORCE_INLINE inline void convolution_f32_avx_s_1d_v_sq_scanline_9(const float * RESTRICT filter, int filter_width, const float * RESTRICT src, float * RESTRICT dst, int src_stride, int j_end) +{ + __m256 f0, f1, f2, f3, f4, f5, f6, f7, f8; + src -= 4 * src_stride; // radius = 4 + + // Evaluate filter taps 0-8 + f0 = _mm256_broadcast_ss(filter + 0); + f1 = _mm256_broadcast_ss(filter + 1); + f2 = _mm256_broadcast_ss(filter + 2); + f3 = _mm256_broadcast_ss(filter + 3); + f4 = _mm256_broadcast_ss(filter + 4); + f5 = _mm256_broadcast_ss(filter + 5); + f6 = _mm256_broadcast_ss(filter + 6); + f7 = _mm256_broadcast_ss(filter + 7); + f8 = _mm256_broadcast_ss(filter + 8); + + for (int j = 0; j < j_end; j += 8) { + __m256 sum0, sum1, sum2, sum3; + __m256 g; + + g = _mm256_load_ps(src + 0 * src_stride + j); + g = _mm256_mul_ps(g, g); + g = _mm256_mul_ps(f0, g); + sum0 = g; + + g = _mm256_load_ps(src + 1 * src_stride + j); + g = _mm256_mul_ps(g, g); + g = _mm256_mul_ps(f1, g); + sum1 = g; + + g = _mm256_load_ps(src + 2 * src_stride + j); + g = _mm256_mul_ps(g, g); + g = _mm256_mul_ps(f2, g); + sum2 = g; + + g = _mm256_load_ps(src + 3 * src_stride + j); + g = _mm256_mul_ps(g, g); + g = _mm256_mul_ps(f3, g); + sum3 = g; + + g = _mm256_load_ps(src + 4 * src_stride + j); + g = _mm256_mul_ps(g, g); + g = _mm256_mul_ps(f4, g); + sum0 = _mm256_add_ps(sum0, g); + + g = _mm256_load_ps(src + 5 * src_stride + j); + g = _mm256_mul_ps(g, g); + g = _mm256_mul_ps(f5, g); + sum1 = _mm256_add_ps(sum1, g); + + g = _mm256_load_ps(src + 6 * src_stride + j); + g = _mm256_mul_ps(g, g); + g = _mm256_mul_ps(f6, g); + sum2 = _mm256_add_ps(sum2, g); + + g = _mm256_load_ps(src + 7 * src_stride + j); + g = _mm256_mul_ps(g, g); + g = _mm256_mul_ps(f7, g); + sum3 = _mm256_add_ps(sum3, g); + + g = _mm256_load_ps(src + 8 * src_stride + j); + g = _mm256_mul_ps(g, g); + g = _mm256_mul_ps(f8, g); + sum0 = _mm256_add_ps(sum0, g); + + sum0 = _mm256_add_ps(sum0, sum2); + sum1 = _mm256_add_ps(sum1, sum3); + + sum0 = _mm256_add_ps(sum0, sum1); + + _mm256_store_ps(dst + j, sum0); + } +} + +FORCE_INLINE inline void convolution_f32_avx_s_1d_v_sq_scanline_5(const float * RESTRICT filter, int filter_width, const float * RESTRICT src, float * RESTRICT dst, int src_stride, int j_end) +{ + __m256 f0, f1, f2, f3, f4; + src -= 2 * src_stride; // radius = 2 + + // Evaluate filter taps 0-5 + f0 = _mm256_broadcast_ss(filter + 0); + f1 = _mm256_broadcast_ss(filter + 1); + f2 = _mm256_broadcast_ss(filter + 2); + f3 = _mm256_broadcast_ss(filter + 3); + f4 = _mm256_broadcast_ss(filter + 4); + + for (int j = 0; j < j_end; j += 8) { + __m256 sum0, sum1, sum2, sum3; + __m256 g; + + g = _mm256_load_ps(src + 0 * src_stride + j); + g = _mm256_mul_ps(g, g); + g = _mm256_mul_ps(f0, g); + sum0 = g; + + g = _mm256_load_ps(src + 1 * src_stride + j); + g = _mm256_mul_ps(g, g); + g = _mm256_mul_ps(f1, g); + sum1 = g; + + g = _mm256_load_ps(src + 2 * src_stride + j); + g = _mm256_mul_ps(g, g); + g = _mm256_mul_ps(f2, g); + sum2 = g; + + g = _mm256_load_ps(src + 3 * src_stride + j); + g = _mm256_mul_ps(g, g); + g = _mm256_mul_ps(f3, g); + sum3 = g; + + g = _mm256_load_ps(src + 4 * src_stride + j); + g = _mm256_mul_ps(g, g); + g = _mm256_mul_ps(f4, g); + sum0 = _mm256_add_ps(sum0, g); + + sum0 = _mm256_add_ps(sum0, sum2); + sum1 = _mm256_add_ps(sum1, sum3); + + sum0 = _mm256_add_ps(sum0, sum1); + + _mm256_store_ps(dst + j, sum0); + } +} + +void convolution_f32_avx_s_1d_sq( + int N, + const float * RESTRICT filter, + int filter_width, + const float * RESTRICT src, + float * RESTRICT dst, + float * RESTRICT tmp, + int width, + int height, + int src_stride, + int dst_stride) +{ + int radius = filter_width / 2; + int width_mod8 = vmaf_floorn(width, 8); + int tmp_stride = vmaf_ceiln(width, 8); + + int i_vec_end = height - radius; + int j_vec_end = width_mod8 - vmaf_ceiln(radius + 1, 8); + + // Vertical pass. + for (int i = 0; i < radius; ++i) { + for (int j = 0; j < width; ++j) { + tmp[i * tmp_stride + j] = convolution_edge_sq_s(false, filter, filter_width, src, width, height, src_stride, i, j); + } + } + for (int i = radius; i < i_vec_end; ++i) { + convolution_f32_avx_s_1d_v_sq_scanline(N, filter, filter_width, src + i * src_stride, tmp + i * tmp_stride, src_stride, width_mod8); + + for (int j = width_mod8; j < width; ++j) { + tmp[i * tmp_stride + j] = convolution_edge_sq_s(false, filter, filter_width, src, width, height, src_stride, i, j); + } + } + for (int i = i_vec_end; i < height; ++i) { + for (int j = 0; j < width; ++j) { + tmp[i * tmp_stride + j] = convolution_edge_sq_s(false, filter, filter_width, src, width, height, src_stride, i, j); + } + } + + // Horizontal pass. + for (int i = 0; i < height; ++i) { + for (int j = 0; j < radius; ++j) { + dst[i * dst_stride + j] = convolution_edge_s(true, filter, filter_width, tmp, width, height, tmp_stride, i, j); + } + + convolution_f32_avx_s_1d_h_scanline(N, filter, filter_width, tmp + i * tmp_stride, dst + i * dst_stride, j_vec_end); + + for (int j = j_vec_end + radius; j < width; ++j) { + dst[i * dst_stride + j] = convolution_edge_s(true, filter, filter_width, tmp, width, height, tmp_stride, i, j); + } + } +} + +void convolution_f32_avx_sq_s(const float *filter, int filter_width, const float *src, float *dst, float *tmp, int width, int height, int src_stride, int dst_stride) +{ + switch (filter_width) { + case 17: + convolution_f32_avx_s_1d_sq(17, filter, filter_width, src, dst, tmp, width, height, src_stride, dst_stride); + break; + case 9: + convolution_f32_avx_s_1d_sq(9, filter, filter_width, src, dst, tmp, width, height, src_stride, dst_stride); + break; + case 5: + convolution_f32_avx_s_1d_sq(5, filter, filter_width, src, dst, tmp, width, height, src_stride, dst_stride); + break; + case 3: + convolution_f32_avx_s_1d_sq(3, filter, filter_width, src, dst, tmp, width, height, src_stride, dst_stride); + break; + default: + convolution_f32_avx_s_1d_sq(0, filter, filter_width, src, dst, tmp, width, height, src_stride, dst_stride); + break; + } +} + +// Filter a single scanline. +FORCE_INLINE inline static void convolution_f32_avx_s_1d_h_xy_scanline(int N, const float * RESTRICT filter, int filter_width, const float * RESTRICT src1, const float * RESTRICT src2, float * RESTRICT dst, int j_end) +{ + + if (N == 5) + { + convolution_f32_avx_s_1d_h_xy_scanline_5(filter, filter_width, src1, src2, dst, j_end); + } + else if (N == 9) + { + convolution_f32_avx_s_1d_h_xy_scanline_9(filter, filter_width, src1, src2, dst, j_end); + } + else if (N == 17) + { + convolution_f32_avx_s_1d_h_xy_scanline_17(filter, filter_width, src1, src2, dst, j_end); + } + else { + + int radius = filter_width / 2; + + for (int x = 0; x < filter_width; x += 9) { + __m256 f0, f1, f2, f3, f4, f5, f6, f7, f8; + + f0 = _mm256_setzero_ps(); + f1 = _mm256_setzero_ps(); + f2 = _mm256_setzero_ps(); + f3 = _mm256_setzero_ps(); + f5 = _mm256_setzero_ps(); + f6 = _mm256_setzero_ps(); + f7 = _mm256_setzero_ps(); + f8 = _mm256_setzero_ps(); + + switch (filter_width - x) { + default: + f8 = _mm256_broadcast_ss(filter + x + 8); + case 8: + f7 = _mm256_broadcast_ss(filter + x + 7); + case 7: + f6 = _mm256_broadcast_ss(filter + x + 6); + case 6: + f5 = _mm256_broadcast_ss(filter + x + 5); + case 5: + f4 = _mm256_broadcast_ss(filter + x + 4); + case 4: + f3 = _mm256_broadcast_ss(filter + x + 3); + case 3: + f2 = _mm256_broadcast_ss(filter + x + 2); + case 2: + f1 = _mm256_broadcast_ss(filter + x + 1); + case 1: + f0 = _mm256_broadcast_ss(filter + x + 0); + } + + for (int j = 0; j < j_end; j += 8) { + __m256 accum = _mm256_setzero_ps(); + __m256 sum0, sum1, sum2, sum3; + __m256 g, g2; + + sum0 = _mm256_setzero_ps(); + sum1 = _mm256_setzero_ps(); + sum2 = _mm256_setzero_ps(); + sum3 = _mm256_setzero_ps(); + + switch (filter_width - x) { + default: + g = _mm256_loadu_ps(src1 + j + x + 8); + g2 = _mm256_loadu_ps(src2 + j + x + 8); + g = _mm256_mul_ps(g, g2); + sum0 = _mm256_mul_ps(f8, g); + case 8: + g = _mm256_loadu_ps(src1 + j + x + 7); + g2 = _mm256_loadu_ps(src2 + j + x + 7); + g = _mm256_mul_ps(g, g2); + sum3 = _mm256_mul_ps(f7, g); + case 7: + g = _mm256_loadu_ps(src1 + j + x + 6); + g2 = _mm256_loadu_ps(src2 + j + x + 6); + g = _mm256_mul_ps(g, g2); + sum2 = _mm256_mul_ps(f6, g); + case 6: + g = _mm256_loadu_ps(src1 + j + x + 5); + g2 = _mm256_loadu_ps(src2 + j + x + 5); + g = _mm256_mul_ps(g, g2); + sum1 = _mm256_mul_ps(f5, g); + case 5: + g = _mm256_loadu_ps(src1 + j + x + 4); + g2 = _mm256_loadu_ps(src2 + j + x + 4); + g = _mm256_mul_ps(g, g2); + g = _mm256_mul_ps(f4, g); + sum0 = _mm256_add_ps(sum0, g); + case 4: + g = _mm256_loadu_ps(src1 + j + x + 3); + g2 = _mm256_loadu_ps(src2 + j + x + 3); + g = _mm256_mul_ps(g, g2); + g = _mm256_mul_ps(f3, g); + sum3 = _mm256_add_ps(sum3, g); + case 3: + g = _mm256_loadu_ps(src1 + j + x + 2); + g2 = _mm256_loadu_ps(src2 + j + x + 2); + g = _mm256_mul_ps(g, g2); + g = _mm256_mul_ps(f2, g); + sum2 = _mm256_add_ps(sum2, g); + case 2: + g = _mm256_loadu_ps(src1 + j + x + 1); + g2 = _mm256_loadu_ps(src2 + j + x + 1); + g = _mm256_mul_ps(g, g2); + g = _mm256_mul_ps(f1, g); + sum1 = _mm256_add_ps(sum1, g); + case 1: + g = _mm256_loadu_ps(src1 + j + x + 0); + g2 = _mm256_loadu_ps(src2 + j + x + 0); + g = _mm256_mul_ps(g, g2); + g = _mm256_mul_ps(f0, g); + sum0 = _mm256_add_ps(sum0, g); + } + + sum0 = _mm256_add_ps(sum0, sum2); + sum1 = _mm256_add_ps(sum1, sum3); + + sum0 = _mm256_add_ps(sum0, sum1); + accum = _mm256_add_ps(accum, sum0); + + if (x) + accum = _mm256_add_ps(accum, _mm256_loadu_ps(dst + j + radius)); + + _mm256_storeu_ps(dst + j + radius, accum); + } + } + + } +} + +FORCE_INLINE inline void convolution_f32_avx_s_1d_h_xy_scanline_17(const float * RESTRICT filter, int filter_width, const float * RESTRICT src1, const float * RESTRICT src2, float * RESTRICT dst, int j_end) +{ + __m256 f0, f1, f2, f3, f4, f5, f6, f7, f8; + + // Evaluate filter taps 0-8 + f0 = _mm256_broadcast_ss(filter + 0); + f1 = _mm256_broadcast_ss(filter + 1); + f2 = _mm256_broadcast_ss(filter + 2); + f3 = _mm256_broadcast_ss(filter + 3); + f4 = _mm256_broadcast_ss(filter + 4); + f5 = _mm256_broadcast_ss(filter + 5); + f6 = _mm256_broadcast_ss(filter + 6); + f7 = _mm256_broadcast_ss(filter + 7); + f8 = _mm256_broadcast_ss(filter + 8); + + for (int j = 0; j < j_end; j += 8) { + __m256 accum = _mm256_setzero_ps(); + __m256 sum0, sum1, sum2, sum3; + __m256 g, g2; + + g = _mm256_loadu_ps(src1 + j + 0); + g2 = _mm256_loadu_ps(src2 + j + 0); + g = _mm256_mul_ps(g, g2); + g = _mm256_mul_ps(f0, g); + sum0 = g; + + g = _mm256_loadu_ps(src1 + j + 1); + g2 = _mm256_loadu_ps(src2 + j + 1); + g = _mm256_mul_ps(g, g2); + g = _mm256_mul_ps(f1, g); + sum1 = g; + + g = _mm256_loadu_ps(src1 + j + 2); + g2 = _mm256_loadu_ps(src2 + j + 2); + g = _mm256_mul_ps(g, g2); + g = _mm256_mul_ps(f2, g); + sum2 = g; + + g = _mm256_loadu_ps(src1 + j + 3); + g2 = _mm256_loadu_ps(src2 + j + 3); + g = _mm256_mul_ps(g, g2); + g = _mm256_mul_ps(f3, g); + sum3 = g; + + g = _mm256_loadu_ps(src1 + j + 4); + g2 = _mm256_loadu_ps(src2 + j + 4); + g = _mm256_mul_ps(g, g2); + g = _mm256_mul_ps(f4, g); + sum0 = _mm256_add_ps(sum0, g); + + g = _mm256_loadu_ps(src1 + j + 5); + g2 = _mm256_loadu_ps(src2 + j + 5); + g = _mm256_mul_ps(g, g2); + g = _mm256_mul_ps(f5, g); + sum1 = _mm256_add_ps(sum1, g); + + g = _mm256_loadu_ps(src1 + j + 6); + g2 = _mm256_loadu_ps(src2 + j + 6); + g = _mm256_mul_ps(g, g2); + g = _mm256_mul_ps(f6, g); + sum2 = _mm256_add_ps(sum2, g); + + g = _mm256_loadu_ps(src1 + j + 7); + g2 = _mm256_loadu_ps(src2 + j + 7); + g = _mm256_mul_ps(g, g2); + g = _mm256_mul_ps(f7, g); + sum3 = _mm256_add_ps(sum3, g); + + g = _mm256_loadu_ps(src1 + j + 8); + g2 = _mm256_loadu_ps(src2 + j + 8); + g = _mm256_mul_ps(g, g2); + g = _mm256_mul_ps(f8, g); + sum0 = _mm256_add_ps(sum0, g); + + sum0 = _mm256_add_ps(sum0, sum2); + sum1 = _mm256_add_ps(sum1, sum3); + + sum0 = _mm256_add_ps(sum0, sum1); + accum = _mm256_add_ps(accum, sum0); + + _mm256_store_ps(dst + j + 8, accum); // radius = 8 + } + + // Evaluate filter taps 9-16 + f0 = _mm256_broadcast_ss(filter + 9); + f1 = _mm256_broadcast_ss(filter + 10); + f2 = _mm256_broadcast_ss(filter + 11); + f3 = _mm256_broadcast_ss(filter + 12); + f4 = _mm256_broadcast_ss(filter + 13); + f5 = _mm256_broadcast_ss(filter + 14); + f6 = _mm256_broadcast_ss(filter + 15); + f7 = _mm256_broadcast_ss(filter + 16); + + for (int j = 0; j < j_end; j += 8) { + __m256 sum0, sum1, sum2, sum3; + __m256 g, g2; + + float *dst_ptr = dst + j + 8; // radius = 8 + + g = _mm256_loadu_ps(src1 + j + 9); + g2 = _mm256_loadu_ps(src2 + j + 9); + g = _mm256_mul_ps(g, g2); + g = _mm256_mul_ps(f0, g); + sum0 = g; + + g = _mm256_loadu_ps(src1 + j + 10); + g2 = _mm256_loadu_ps(src2 + j + 10); + g = _mm256_mul_ps(g, g2); + g = _mm256_mul_ps(f1, g); + sum1 = g; + + g = _mm256_loadu_ps(src1 + j + 11); + g2 = _mm256_loadu_ps(src2 + j + 11); + g = _mm256_mul_ps(g, g2); + g = _mm256_mul_ps(f2, g); + sum2 = g; + + g = _mm256_loadu_ps(src1 + j + 12); + g2 = _mm256_loadu_ps(src2 + j + 12); + g = _mm256_mul_ps(g, g2); + g = _mm256_mul_ps(f3, g); + sum3 = g; + + g = _mm256_loadu_ps(src1 + j + 13); + g2 = _mm256_loadu_ps(src2 + j + 13); + g = _mm256_mul_ps(g, g2); + g = _mm256_mul_ps(f4, g); + sum0 = _mm256_add_ps(sum0, g); + + g = _mm256_loadu_ps(src1 + j + 14); + g2 = _mm256_loadu_ps(src2 + j + 14); + g = _mm256_mul_ps(g, g2); + g = _mm256_mul_ps(f5, g); + sum1 = _mm256_add_ps(sum1, g); + + g = _mm256_loadu_ps(src1 + j + 15); + g2 = _mm256_loadu_ps(src2 + j + 15); + g = _mm256_mul_ps(g, g2); + g = _mm256_mul_ps(f6, g); + sum2 = _mm256_add_ps(sum2, g); + + g = _mm256_loadu_ps(src1 + j + 16); + g2 = _mm256_loadu_ps(src2 + j + 16); + g = _mm256_mul_ps(g, g2); + g = _mm256_mul_ps(f7, g); + sum3 = _mm256_add_ps(sum3, g); + + sum0 = _mm256_add_ps(sum0, sum2); + sum1 = _mm256_add_ps(sum1, sum3); + + sum0 = _mm256_add_ps(sum0, sum1); + + sum0 = _mm256_add_ps(_mm256_load_ps(dst_ptr), sum0); + _mm256_store_ps(dst_ptr, sum0); + } +} + +FORCE_INLINE inline void convolution_f32_avx_s_1d_h_xy_scanline_9(const float * RESTRICT filter, int filter_width, const float * RESTRICT src1, const float * RESTRICT src2, float * RESTRICT dst, int j_end) +{ + __m256 f0, f1, f2, f3, f4, f5, f6, f7, f8; + + f0 = _mm256_broadcast_ss(filter + 0); + f1 = _mm256_broadcast_ss(filter + 1); + f2 = _mm256_broadcast_ss(filter + 2); + f3 = _mm256_broadcast_ss(filter + 3); + f4 = _mm256_broadcast_ss(filter + 4); + f5 = _mm256_broadcast_ss(filter + 5); + f6 = _mm256_broadcast_ss(filter + 6); + f7 = _mm256_broadcast_ss(filter + 7); + f8 = _mm256_broadcast_ss(filter + 8); + + for (int j = 0; j < j_end; j += 8) { + __m256 accum = _mm256_setzero_ps(); + __m256 sum0, sum1, sum2, sum3; + __m256 g, g2; + + g = _mm256_loadu_ps(src1 + j + 0); + g2 = _mm256_loadu_ps(src2 + j + 0); + g = _mm256_mul_ps(g, g2); + g = _mm256_mul_ps(f0, g); + sum0 = g; + + g = _mm256_loadu_ps(src1 + j + 1); + g2 = _mm256_loadu_ps(src2 + j + 1); + g = _mm256_mul_ps(g, g2); + g = _mm256_mul_ps(f1, g); + sum1 = g; + + g = _mm256_loadu_ps(src1 + j + 2); + g2 = _mm256_loadu_ps(src2 + j + 2); + g = _mm256_mul_ps(g, g2); + g = _mm256_mul_ps(f2, g); + sum2 = g; + + g = _mm256_loadu_ps(src1 + j + 3); + g2 = _mm256_loadu_ps(src2 + j + 3); + g = _mm256_mul_ps(g, g2); + g = _mm256_mul_ps(f3, g); + sum3 = g; + + g = _mm256_loadu_ps(src1 + j + 4); + g2 = _mm256_loadu_ps(src2 + j + 4); + g = _mm256_mul_ps(g, g2); + g = _mm256_mul_ps(f4, g); + sum0 = _mm256_add_ps(sum0, g); + + g = _mm256_loadu_ps(src1 + j + 5); + g2 = _mm256_loadu_ps(src2 + j + 5); + g = _mm256_mul_ps(g, g2); + g = _mm256_mul_ps(f5, g); + sum1 = _mm256_add_ps(sum1, g); + + g = _mm256_loadu_ps(src1 + j + 6); + g2 = _mm256_loadu_ps(src2 + j + 6); + g = _mm256_mul_ps(g, g2); + g = _mm256_mul_ps(f6, g); + sum2 = _mm256_add_ps(sum2, g); + + g = _mm256_loadu_ps(src1 + j + 7); + g2 = _mm256_loadu_ps(src2 + j + 7); + g = _mm256_mul_ps(g, g2); + g = _mm256_mul_ps(f7, g); + sum3 = _mm256_add_ps(sum3, g); + + g = _mm256_loadu_ps(src1 + j + 8); + g2 = _mm256_loadu_ps(src2 + j + 8); + g = _mm256_mul_ps(g, g2); + g = _mm256_mul_ps(f8, g); + sum0 = _mm256_add_ps(sum0, g); + + sum0 = _mm256_add_ps(sum0, sum2); + sum1 = _mm256_add_ps(sum1, sum3); + + sum0 = _mm256_add_ps(sum0, sum1); + accum = _mm256_add_ps(accum, sum0); + + _mm256_storeu_ps(dst + j + 4, accum); // radius = 4 + } +} + +FORCE_INLINE inline void convolution_f32_avx_s_1d_h_xy_scanline_5(const float * RESTRICT filter, int filter_width, const float * RESTRICT src1, const float * RESTRICT src2, float * RESTRICT dst, int j_end) +{ + __m256 f0, f1, f2, f3, f4; + + f0 = _mm256_broadcast_ss(filter + 0); + f1 = _mm256_broadcast_ss(filter + 1); + f2 = _mm256_broadcast_ss(filter + 2); + f3 = _mm256_broadcast_ss(filter + 3); + f4 = _mm256_broadcast_ss(filter + 4); + + for (int j = 0; j < j_end; j += 8) { + __m256 accum = _mm256_setzero_ps(); + __m256 sum0, sum1, sum2, sum3; + __m256 g, g2; + + g = _mm256_loadu_ps(src1 + j + 0); + g2 = _mm256_loadu_ps(src2 + j + 0); + g = _mm256_mul_ps(g, g2); + g = _mm256_mul_ps(f0, g); + sum0 = g; + + g = _mm256_loadu_ps(src1 + j + 1); + g2 = _mm256_loadu_ps(src2 + j + 1); + g = _mm256_mul_ps(g, g2); + g = _mm256_mul_ps(f1, g); + sum1 = g; + + g = _mm256_loadu_ps(src1 + j + 2); + g2 = _mm256_loadu_ps(src2 + j + 2); + g = _mm256_mul_ps(g, g2); + g = _mm256_mul_ps(f2, g); + sum2 = g; + + g = _mm256_loadu_ps(src1 + j + 3); + g2 = _mm256_loadu_ps(src2 + j + 3); + g = _mm256_mul_ps(g, g2); + g = _mm256_mul_ps(f3, g); + sum3 = g; + + g = _mm256_loadu_ps(src1 + j + 4); + g2 = _mm256_loadu_ps(src2 + j + 4); + g = _mm256_mul_ps(g, g2); + g = _mm256_mul_ps(f4, g); + sum0 = _mm256_add_ps(sum0, g); + + sum0 = _mm256_add_ps(sum0, sum2); + sum1 = _mm256_add_ps(sum1, sum3); + + sum0 = _mm256_add_ps(sum0, sum1); + accum = _mm256_add_ps(accum, sum0); + + _mm256_storeu_ps(dst + j + 2, accum); // radius = 2 + } +} + +// Filter a single scanline. +FORCE_INLINE inline static void convolution_f32_avx_s_1d_v_xy_scanline(int N, const float * RESTRICT filter, int filter_width, const float * RESTRICT src1, const float * RESTRICT src2, float * RESTRICT dst, int src1_stride, int src2_stride, int j_end) +{ + + if (N == 5) + { + convolution_f32_avx_s_1d_v_xy_scanline_5(filter, filter_width, src1, src2, dst, src1_stride, src2_stride, j_end); + } + else if (N == 9) + { + convolution_f32_avx_s_1d_v_xy_scanline_9(filter, filter_width, src1, src2, dst, src1_stride, src2_stride, j_end); + } + else if (N == 17) + { + convolution_f32_avx_s_1d_v_xy_scanline_17(filter, filter_width, src1, src2, dst, src1_stride, src2_stride, j_end); + } + else { + + int radius = filter_width / 2; + src1 -= radius * src1_stride; + src2 -= radius * src2_stride; + + for (int y = 0; y < filter_width; y += 9) { + __m256 f0, f1, f2, f3, f4, f5, f6, f7, f8; + + f0 = _mm256_setzero_ps(); + f1 = _mm256_setzero_ps(); + f2 = _mm256_setzero_ps(); + f3 = _mm256_setzero_ps(); + f5 = _mm256_setzero_ps(); + f6 = _mm256_setzero_ps(); + f7 = _mm256_setzero_ps(); + f8 = _mm256_setzero_ps(); + + switch (filter_width - y) { + default: + f8 = _mm256_broadcast_ss(filter + y + 8); + case 8: + f7 = _mm256_broadcast_ss(filter + y + 7); + case 7: + f6 = _mm256_broadcast_ss(filter + y + 6); + case 6: + f5 = _mm256_broadcast_ss(filter + y + 5); + case 5: + f4 = _mm256_broadcast_ss(filter + y + 4); + case 4: + f3 = _mm256_broadcast_ss(filter + y + 3); + case 3: + f2 = _mm256_broadcast_ss(filter + y + 2); + case 2: + f1 = _mm256_broadcast_ss(filter + y + 1); + case 1: + f0 = _mm256_broadcast_ss(filter + y + 0); + } + + for (int j = 0; j < j_end; j += 8) { + __m256 accum = _mm256_setzero_ps(); + __m256 sum0, sum1, sum2, sum3; + __m256 g, g2; + + sum0 = _mm256_setzero_ps(); + sum1 = _mm256_setzero_ps(); + sum2 = _mm256_setzero_ps(); + sum3 = _mm256_setzero_ps(); + + switch (filter_width - y) { + default: + g = _mm256_load_ps(src1 + (y + 8) * src1_stride + j); + g2 = _mm256_load_ps(src2 + (y + 8) * src2_stride + j); + g = _mm256_mul_ps(g, g2); + sum0 = _mm256_mul_ps(f8, g); + case 8: + g = _mm256_load_ps(src1 + (y + 7) * src1_stride + j); + g2 = _mm256_load_ps(src2 + (y + 7) * src2_stride + j); + g = _mm256_mul_ps(g, g2); + sum3 = _mm256_mul_ps(f7, g); + case 7: + g = _mm256_load_ps(src1 + (y + 6) * src1_stride + j); + g2 = _mm256_load_ps(src2 + (y + 6) * src2_stride + j); + g = _mm256_mul_ps(g, g2); + sum2 = _mm256_mul_ps(f6, g); + case 6: + g = _mm256_load_ps(src1 + (y + 5) * src1_stride + j); + g2 = _mm256_load_ps(src2 + (y + 5) * src2_stride + j); + g = _mm256_mul_ps(g, g2); + sum1 = _mm256_mul_ps(f5, g); + case 5: + g = _mm256_load_ps(src1 + (y + 4) * src1_stride + j); + g2 = _mm256_load_ps(src2 + (y + 4) * src2_stride + j); + g = _mm256_mul_ps(g, g2); + g = _mm256_mul_ps(f4, g); + sum0 = _mm256_add_ps(sum0, g); + case 4: + g = _mm256_load_ps(src1 + (y + 3) * src1_stride + j); + g2 = _mm256_load_ps(src2 + (y + 3) * src2_stride + j); + g = _mm256_mul_ps(g, g2); + g = _mm256_mul_ps(f3, g); + sum3 = _mm256_add_ps(sum3, g); + case 3: + g = _mm256_load_ps(src1 + (y + 2) * src1_stride + j); + g2 = _mm256_load_ps(src2 + (y + 2) * src2_stride + j); + g = _mm256_mul_ps(g, g2); + g = _mm256_mul_ps(f2, g); + sum2 = _mm256_add_ps(sum2, g); + case 2: + g = _mm256_load_ps(src1 + (y + 1) * src1_stride + j); + g2 = _mm256_load_ps(src2 + (y + 1) * src2_stride + j); + g = _mm256_mul_ps(g, g2); + g = _mm256_mul_ps(f1, g); + sum1 = _mm256_add_ps(sum1, g); + case 1: + g = _mm256_load_ps(src1 + (y + 0) * src1_stride + j); + g2 = _mm256_load_ps(src2 + (y + 0) * src2_stride + j); + g = _mm256_mul_ps(g, g2); + g = _mm256_mul_ps(f0, g); + sum0 = _mm256_add_ps(sum0, g); + } + + sum0 = _mm256_add_ps(sum0, sum2); + sum1 = _mm256_add_ps(sum1, sum3); + + sum0 = _mm256_add_ps(sum0, sum1); + accum = _mm256_add_ps(accum, sum0); + + if (y) + accum = _mm256_add_ps(accum, _mm256_load_ps(dst + j)); + + _mm256_store_ps(dst + j, accum); + } + } + } +} + +FORCE_INLINE inline void convolution_f32_avx_s_1d_v_xy_scanline_17(const float * RESTRICT filter, int filter_width, const float * RESTRICT src1, const float * RESTRICT src2, float * RESTRICT dst, int src1_stride, int src2_stride, int j_end) +{ + __m256 f0, f1, f2, f3, f4, f5, f6, f7, f8; + src1 -= 8 * src1_stride; // radius = 8 + src2 -= 8 * src2_stride; // radius = 8 + + // Evaluate filter taps 0-8 + f0 = _mm256_broadcast_ss(filter + 0); + f1 = _mm256_broadcast_ss(filter + 1); + f2 = _mm256_broadcast_ss(filter + 2); + f3 = _mm256_broadcast_ss(filter + 3); + f4 = _mm256_broadcast_ss(filter + 4); + f5 = _mm256_broadcast_ss(filter + 5); + f6 = _mm256_broadcast_ss(filter + 6); + f7 = _mm256_broadcast_ss(filter + 7); + f8 = _mm256_broadcast_ss(filter + 8); + + for (int j = 0; j < j_end; j += 8) { + __m256 sum0, sum1, sum2, sum3; + __m256 g, g2; + + g = _mm256_load_ps(src1 + 0 * src1_stride + j); + g2 = _mm256_load_ps(src2 + 0 * src2_stride + j); + g = _mm256_mul_ps(g, g2); + g = _mm256_mul_ps(f0, g); + sum0 = g; + + g = _mm256_load_ps(src1 + 1 * src1_stride + j); + g2 = _mm256_load_ps(src2 + 1 * src2_stride + j); + g = _mm256_mul_ps(g, g2); + g = _mm256_mul_ps(f1, g); + sum1 = g; + + g = _mm256_load_ps(src1 + 2 * src1_stride + j); + g2 = _mm256_load_ps(src2 + 2 * src2_stride + j); + g = _mm256_mul_ps(g, g2); + g = _mm256_mul_ps(f2, g); + sum2 = g; + + g = _mm256_load_ps(src1 + 3 * src1_stride + j); + g2 = _mm256_load_ps(src2 + 3 * src2_stride + j); + g = _mm256_mul_ps(g, g2); + g = _mm256_mul_ps(f3, g); + sum3 = g; + + g = _mm256_load_ps(src1 + 4 * src1_stride + j); + g2 = _mm256_load_ps(src2 + 4 * src2_stride + j); + g = _mm256_mul_ps(g, g2); + g = _mm256_mul_ps(f4, g); + sum0 = _mm256_add_ps(sum0, g); + + g = _mm256_load_ps(src1 + 5 * src1_stride + j); + g2 = _mm256_load_ps(src2 + 5 * src2_stride + j); + g = _mm256_mul_ps(g, g2); + g = _mm256_mul_ps(f5, g); + sum1 = _mm256_add_ps(sum1, g); + + g = _mm256_load_ps(src1 + 6 * src1_stride + j); + g2 = _mm256_load_ps(src2 + 6 * src2_stride + j); + g = _mm256_mul_ps(g, g2); + g = _mm256_mul_ps(f6, g); + sum2 = _mm256_add_ps(sum2, g); + + g = _mm256_load_ps(src1 + 7 * src1_stride + j); + g2 = _mm256_load_ps(src2 + 7 * src2_stride + j); + g = _mm256_mul_ps(g, g2); + g = _mm256_mul_ps(f7, g); + sum3 = _mm256_add_ps(sum3, g); + + g = _mm256_load_ps(src1 + 8 * src1_stride + j); + g2 = _mm256_load_ps(src2 + 8 * src2_stride + j); + g = _mm256_mul_ps(g, g2); + g = _mm256_mul_ps(f8, g); + sum0 = _mm256_add_ps(sum0, g); + + sum0 = _mm256_add_ps(sum0, sum2); + sum1 = _mm256_add_ps(sum1, sum3); + + sum0 = _mm256_add_ps(sum0, sum1); + + _mm256_store_ps(dst + j, sum0); + } + + // Evaluate filter taps 9-16 + f0 = _mm256_broadcast_ss(filter + 9); + f1 = _mm256_broadcast_ss(filter + 10); + f2 = _mm256_broadcast_ss(filter + 11); + f3 = _mm256_broadcast_ss(filter + 12); + f4 = _mm256_broadcast_ss(filter + 13); + f5 = _mm256_broadcast_ss(filter + 14); + f6 = _mm256_broadcast_ss(filter + 15); + f7 = _mm256_broadcast_ss(filter + 16); + + for (int j = 0; j < j_end; j += 8) { + __m256 sum0, sum1, sum2, sum3; + __m256 g, g2; + + g = _mm256_load_ps(src1 + 9 * src1_stride + j); + g2 = _mm256_load_ps(src2 + 9 * src2_stride + j); + g = _mm256_mul_ps(g, g2); + g = _mm256_mul_ps(f0, g); + sum0 = g; + + g = _mm256_load_ps(src1 + 10 * src1_stride + j); + g2 = _mm256_load_ps(src2 + 10 * src2_stride + j); + g = _mm256_mul_ps(g, g2); + g = _mm256_mul_ps(f1, g); + sum1 = g; + + g = _mm256_load_ps(src1 + 11 * src1_stride + j); + g2 = _mm256_load_ps(src2 + 11 * src2_stride + j); + g = _mm256_mul_ps(g, g2); + g = _mm256_mul_ps(f2, g); + sum2 = g; + + g = _mm256_load_ps(src1 + 12 * src1_stride + j); + g2 = _mm256_load_ps(src2 + 12 * src2_stride + j); + g = _mm256_mul_ps(g, g2); + g = _mm256_mul_ps(f3, g); + sum3 = g; + + g = _mm256_load_ps(src1 + 13 * src1_stride + j); + g2 = _mm256_load_ps(src2 + 13 * src2_stride + j); + g = _mm256_mul_ps(g, g2); + g = _mm256_mul_ps(f4, g); + sum0 = _mm256_add_ps(sum0, g); + + g = _mm256_load_ps(src1 + 14 * src1_stride + j); + g2 = _mm256_load_ps(src2 + 14 * src2_stride + j); + g = _mm256_mul_ps(g, g2); + g = _mm256_mul_ps(f5, g); + sum1 = _mm256_add_ps(sum1, g); + + g = _mm256_load_ps(src1 + 15 * src1_stride + j); + g2 = _mm256_load_ps(src2 + 15 * src2_stride + j); + g = _mm256_mul_ps(g, g2); + g = _mm256_mul_ps(f6, g); + sum2 = _mm256_add_ps(sum2, g); + + g = _mm256_load_ps(src1 + 16 * src1_stride + j); + g2 = _mm256_load_ps(src2 + 16 * src2_stride + j); + g = _mm256_mul_ps(g, g2); + g = _mm256_mul_ps(f7, g); + sum3 = _mm256_add_ps(sum3, g); + + sum0 = _mm256_add_ps(sum0, sum2); + sum1 = _mm256_add_ps(sum1, sum3); + + sum0 = _mm256_add_ps(sum0, sum1); + + sum0 = _mm256_add_ps(_mm256_load_ps(dst + j), sum0); + _mm256_store_ps(dst + j, sum0); + } +} + +FORCE_INLINE inline void convolution_f32_avx_s_1d_v_xy_scanline_9(const float * RESTRICT filter, int filter_width, const float * RESTRICT src1, const float * RESTRICT src2, float * RESTRICT dst, int src1_stride, int src2_stride, int j_end) +{ + __m256 f0, f1, f2, f3, f4, f5, f6, f7, f8; + src1 -= 4 * src1_stride; // radius = 4 + src2 -= 4 * src2_stride; // radius = 4 + + // Evaluate filter taps 0-8 + f0 = _mm256_broadcast_ss(filter + 0); + f1 = _mm256_broadcast_ss(filter + 1); + f2 = _mm256_broadcast_ss(filter + 2); + f3 = _mm256_broadcast_ss(filter + 3); + f4 = _mm256_broadcast_ss(filter + 4); + f5 = _mm256_broadcast_ss(filter + 5); + f6 = _mm256_broadcast_ss(filter + 6); + f7 = _mm256_broadcast_ss(filter + 7); + f8 = _mm256_broadcast_ss(filter + 8); + + for (int j = 0; j < j_end; j += 8) { + __m256 sum0, sum1, sum2, sum3; + __m256 g, g2; + + g = _mm256_load_ps(src1 + 0 * src1_stride + j); + g2 = _mm256_load_ps(src2 + 0 * src2_stride + j); + g = _mm256_mul_ps(g, g2); + g = _mm256_mul_ps(f0, g); + sum0 = g; + + g = _mm256_load_ps(src1 + 1 * src1_stride + j); + g2 = _mm256_load_ps(src2 + 1 * src2_stride + j); + g = _mm256_mul_ps(g, g2); + g = _mm256_mul_ps(f1, g); + sum1 = g; + + g = _mm256_load_ps(src1 + 2 * src1_stride + j); + g2 = _mm256_load_ps(src2 + 2 * src2_stride + j); + g = _mm256_mul_ps(g, g2); + g = _mm256_mul_ps(f2, g); + sum2 = g; + + g = _mm256_load_ps(src1 + 3 * src1_stride + j); + g2 = _mm256_load_ps(src2 + 3 * src2_stride + j); + g = _mm256_mul_ps(g, g2); + g = _mm256_mul_ps(f3, g); + sum3 = g; + + g = _mm256_load_ps(src1 + 4 * src1_stride + j); + g2 = _mm256_load_ps(src2 + 4 * src2_stride + j); + g = _mm256_mul_ps(g, g2); + g = _mm256_mul_ps(f4, g); + sum0 = _mm256_add_ps(sum0, g); + + g = _mm256_load_ps(src1 + 5 * src1_stride + j); + g2 = _mm256_load_ps(src2 + 5 * src2_stride + j); + g = _mm256_mul_ps(g, g2); + g = _mm256_mul_ps(f5, g); + sum1 = _mm256_add_ps(sum1, g); + + g = _mm256_load_ps(src1 + 6 * src1_stride + j); + g2 = _mm256_load_ps(src2 + 6 * src2_stride + j); + g = _mm256_mul_ps(g, g2); + g = _mm256_mul_ps(f6, g); + sum2 = _mm256_add_ps(sum2, g); + + g = _mm256_load_ps(src1 + 7 * src1_stride + j); + g2 = _mm256_load_ps(src2 + 7 * src2_stride + j); + g = _mm256_mul_ps(g, g2); + g = _mm256_mul_ps(f7, g); + sum3 = _mm256_add_ps(sum3, g); + + g = _mm256_load_ps(src1 + 8 * src1_stride + j); + g2 = _mm256_load_ps(src2 + 8 * src2_stride + j); + g = _mm256_mul_ps(g, g2); + g = _mm256_mul_ps(f8, g); + sum0 = _mm256_add_ps(sum0, g); + + sum0 = _mm256_add_ps(sum0, sum2); + sum1 = _mm256_add_ps(sum1, sum3); + + sum0 = _mm256_add_ps(sum0, sum1); + + _mm256_store_ps(dst + j, sum0); + } +} + +FORCE_INLINE inline void convolution_f32_avx_s_1d_v_xy_scanline_5(const float * RESTRICT filter, int filter_width, const float * RESTRICT src1, const float * RESTRICT src2, float * RESTRICT dst, int src1_stride, int src2_stride, int j_end) +{ + __m256 f0, f1, f2, f3, f4; + src1 -= 2 * src1_stride; // radius = 2 + src2 -= 2 * src2_stride; // radius = 2 + + // Evaluate filter taps 0-5 + f0 = _mm256_broadcast_ss(filter + 0); + f1 = _mm256_broadcast_ss(filter + 1); + f2 = _mm256_broadcast_ss(filter + 2); + f3 = _mm256_broadcast_ss(filter + 3); + f4 = _mm256_broadcast_ss(filter + 4); + + for (int j = 0; j < j_end; j += 8) { + __m256 sum0, sum1, sum2, sum3; + __m256 g, g2; + + g = _mm256_load_ps(src1 + 0 * src1_stride + j); + g2 = _mm256_load_ps(src2 + 0 * src2_stride + j); + g = _mm256_mul_ps(g, g2); + g = _mm256_mul_ps(f0, g); + sum0 = g; + + g = _mm256_load_ps(src1 + 1 * src1_stride + j); + g2 = _mm256_load_ps(src2 + 1 * src2_stride + j); + g = _mm256_mul_ps(g, g2); + g = _mm256_mul_ps(f1, g); + sum1 = g; + + g = _mm256_load_ps(src1 + 2 * src1_stride + j); + g2 = _mm256_load_ps(src2 + 2 * src2_stride + j); + g = _mm256_mul_ps(g, g2); + g = _mm256_mul_ps(f2, g); + sum2 = g; + + g = _mm256_load_ps(src1 + 3 * src1_stride + j); + g2 = _mm256_load_ps(src2 + 3 * src2_stride + j); + g = _mm256_mul_ps(g, g2); + g = _mm256_mul_ps(f3, g); + sum3 = g; + + g = _mm256_load_ps(src1 + 4 * src1_stride + j); + g2 = _mm256_load_ps(src2 + 4 * src2_stride + j); + g = _mm256_mul_ps(g, g2); + g = _mm256_mul_ps(f4, g); + sum0 = _mm256_add_ps(sum0, g); + + sum0 = _mm256_add_ps(sum0, sum2); + sum1 = _mm256_add_ps(sum1, sum3); + + sum0 = _mm256_add_ps(sum0, sum1); + + _mm256_store_ps(dst + j, sum0); + } +} + +void convolution_f32_avx_s_1d_xy( + int N, + const float * RESTRICT filter, + int filter_width, + const float * RESTRICT src1, + const float * RESTRICT src2, + float * RESTRICT dst, + float * RESTRICT tmp, + int width, + int height, + int src1_stride, + int src2_stride, + int dst_stride) +{ + int radius = filter_width / 2; + int width_mod8 = vmaf_floorn(width, 8); + int tmp_stride = vmaf_ceiln(width, 8); + + int i_vec_end = height - radius; + int j_vec_end = width_mod8 - vmaf_ceiln(radius + 1, 8); + + // Vertical pass. + for (int i = 0; i < radius; ++i) { + for (int j = 0; j < width; ++j) { + tmp[i * tmp_stride + j] = convolution_edge_xy_s(false, filter, filter_width, src1, src2, width, height, src1_stride, src2_stride, i, j); + } + } + for (int i = radius; i < i_vec_end; ++i) { + convolution_f32_avx_s_1d_v_xy_scanline(N, filter, filter_width, src1 + i * src1_stride, src2 + i * src2_stride, tmp + i * tmp_stride, src1_stride, src2_stride, width_mod8); + + for (int j = width_mod8; j < width; ++j) { + tmp[i * tmp_stride + j] = convolution_edge_xy_s(false, filter, filter_width, src1, src2, width, height, src1_stride, src2_stride, i, j); + } + } + for (int i = i_vec_end; i < height; ++i) { + for (int j = 0; j < width; ++j) { + tmp[i * tmp_stride + j] = convolution_edge_xy_s(false, filter, filter_width, src1, src2, width, height, src1_stride, src2_stride, i, j); + } + } + + // Horizontal pass. + for (int i = 0; i < height; ++i) { + for (int j = 0; j < radius; ++j) { + dst[i * dst_stride + j] = convolution_edge_s(true, filter, filter_width, tmp, width, height, tmp_stride, i, j); + } + + convolution_f32_avx_s_1d_h_scanline(N, filter, filter_width, tmp + i * tmp_stride, dst + i * dst_stride, j_vec_end); + + for (int j = j_vec_end + radius; j < width; ++j) { + dst[i * dst_stride + j] = convolution_edge_s(true, filter, filter_width, tmp, width, height, tmp_stride, i, j); + } + } +} + +void convolution_f32_avx_xy_s(const float *filter, int filter_width, const float *src1, const float *src2, float *dst, float *tmp, int width, int height, int src1_stride, int src2_stride, int dst_stride) +{ + switch (filter_width) { + case 17: + convolution_f32_avx_s_1d_xy(17, filter, filter_width, src1, src2, dst, tmp, width, height, src1_stride, src2_stride, dst_stride); + break; + case 9: + convolution_f32_avx_s_1d_xy(9, filter, filter_width, src1, src2, dst, tmp, width, height, src1_stride, src2_stride, dst_stride); + break; + case 5: + convolution_f32_avx_s_1d_xy(5, filter, filter_width, src1, src2, dst, tmp, width, height, src1_stride, src2_stride, dst_stride); + break; + case 3: + convolution_f32_avx_s_1d_xy(3, filter, filter_width, src1, src2, dst, tmp, width, height, src1_stride, src2_stride, dst_stride); + break; + default: + convolution_f32_avx_s_1d_xy(0, filter, filter_width, src1, src2, dst, tmp, width, height, src1_stride, src2_stride, dst_stride); + break; + } +} +#endif diff --git a/feature/src/common/convolution_internal.h b/feature/src/common/convolution_internal.h index d41a2f9ff..1bf5a3863 100644 --- a/feature/src/common/convolution_internal.h +++ b/feature/src/common/convolution_internal.h @@ -51,4 +51,64 @@ FORCE_INLINE inline float convolution_edge_s(bool horizontal, const float *filte return accum; } +#if VIF_OPT_ENABLE +FORCE_INLINE inline float convolution_edge_sq_s(bool horizontal, const float *filter, int filter_width, const float *src, int width, int height, int stride, int i, int j) +{ + int radius = filter_width / 2; + + float accum = 0; + float src_val; + for (int k = 0; k < filter_width; ++k) { + int i_tap = horizontal ? i : i - radius + k; + int j_tap = horizontal ? j - radius + k : j; + + // Handle edges by mirroring. + if (horizontal) { + if (j_tap < 0) + j_tap = -j_tap; + else if (j_tap >= width) + j_tap = width - (j_tap - width + 1); + } + else { + if (i_tap < 0) + i_tap = -i_tap; + else if (i_tap >= height) + i_tap = height - (i_tap - height + 1); + } + src_val = src[i_tap * stride + j_tap]; + accum += filter[k] * (src_val * src_val); + } + return accum; +} + +FORCE_INLINE inline float convolution_edge_xy_s(bool horizontal, const float *filter, int filter_width, const float *src1, const float *src2, int width, int height, int stride1, int stride2, int i, int j) +{ + int radius = filter_width / 2; + + float accum = 0; + float src_val1, src_val2; + for (int k = 0; k < filter_width; ++k) { + int i_tap = horizontal ? i : i - radius + k; + int j_tap = horizontal ? j - radius + k : j; + + // Handle edges by mirroring. + if (horizontal) { + if (j_tap < 0) + j_tap = -j_tap; + else if (j_tap >= width) + j_tap = width - (j_tap - width + 1); + } + else { + if (i_tap < 0) + i_tap = -i_tap; + else if (i_tap >= height) + i_tap = height - (i_tap - height + 1); + } + src_val1 = src1[i_tap * stride1 + j_tap]; + src_val2 = src2[i_tap * stride2 + j_tap]; + accum += filter[k] * (src_val1 * src_val2); + } + return accum; +} +#endif #endif // CONVOLUTION_INTERNAL_H_ diff --git a/feature/src/vif.c b/feature/src/vif.c index 165677e75..b3f13169b 100644 --- a/feature/src/vif.c +++ b/feature/src/vif.c @@ -26,6 +26,8 @@ #include "common/alloc.h" #include "common/file_io.h" #include "vif_options.h" +#include "convolution.h" +#include "convolution_internal.h" #include "vif_tools.h" #define read_image_b read_image_b2s @@ -40,6 +42,10 @@ #define vif_statistic vif_statistic_s #define offset_image offset_image_s +#if VIF_OPT_ENABLE +#define vif_filter1d_sq vif_filter1d_sq_s +#define vif_filter1d_xy vif_filter1d_xy_s +#endif int compute_vif(const float *ref, const float *dis, int w, int h, int ref_stride, int dis_stride, double *score, double *score_num, double *score_den, double *scores) { float *data_buf = 0; @@ -53,15 +59,22 @@ int compute_vif(const float *ref, const float *dis, int w, int h, int ref_stride float *mu1; float *mu2; - float *mu1_sq; - float *mu2_sq; - float *mu1_mu2; float *ref_sq_filt; float *dis_sq_filt; float *ref_dis_filt; + float *tmpbuf; + + +#if VIF_OPT_ENABLE float *num_array; float *den_array; - float *tmpbuf; +#else + float *mu1_sq; + float *mu2_sq; + float *mu1_mu2; + float *num_array; + float *den_array; +#endif /* Offset pointers to adjust for convolution border handling. */ float *mu1_adj = 0; @@ -88,11 +101,42 @@ int compute_vif(const float *ref, const float *dis, int w, int h, int ref_stride int buf_stride = ALIGN_CEIL(w * sizeof(float)); size_t buf_sz_one = (size_t)buf_stride * h; - double num = 0; - double den = 0; + float num = 0; + float den = 0; int scale; int ret = 1; +#if VIF_OPT_ENABLE + // Code optimized to save on multiple buffer copies + // hence the reduction in the number of buffers required from 15 to 10 +#define VIF_BUF_CNT 10 + if (SIZE_MAX / buf_sz_one < VIF_BUF_CNT) + { + printf("error: SIZE_MAX / buf_sz_one < VIF_BUF_CNT, buf_sz_one = %zu.\n", buf_sz_one); + fflush(stdout); + goto fail_or_end; + } + + if (!(data_buf = aligned_malloc(buf_sz_one * VIF_BUF_CNT, MAX_ALIGN))) + { + printf("error: aligned_malloc failed for data_buf.\n"); + fflush(stdout); + goto fail_or_end; + } + + data_top = (char *)data_buf; + + ref_scale = (float *)data_top; data_top += buf_sz_one; + dis_scale = (float *)data_top; data_top += buf_sz_one; + mu1 = (float *)data_top; data_top += buf_sz_one; + mu2 = (float *)data_top; data_top += buf_sz_one; + ref_sq_filt = (float *)data_top; data_top += buf_sz_one; + dis_sq_filt = (float *)data_top; data_top += buf_sz_one; + ref_dis_filt = (float *)data_top; data_top += buf_sz_one; + num_array = (float *)data_top; data_top += buf_sz_one; + den_array = (float *)data_top; data_top += buf_sz_one; + tmpbuf = (float *)data_top; data_top += buf_sz_one; +#else if (SIZE_MAX / buf_sz_one < 15) { @@ -126,6 +170,7 @@ int compute_vif(const float *ref, const float *dis, int w, int h, int ref_stride num_array = (float *)data_top; data_top += buf_sz_one; den_array = (float *)data_top; data_top += buf_sz_one; tmpbuf = (float *)data_top; data_top += buf_sz_one; +#endif for (scale = 0; scale < 4; ++scale) { @@ -192,36 +237,49 @@ int compute_vif(const float *ref, const float *dis, int w, int h, int ref_stride vif_filter2d(filter, curr_ref_scale, mu1, w, h, curr_ref_stride, buf_stride, filter_width); vif_filter2d(filter, curr_dis_scale, mu2, w, h, curr_dis_stride, buf_stride, filter_width); #endif +#if !VIF_OPT_ENABLE vif_xx_yy_xy(mu1, mu2, mu1_sq, mu2_sq, mu1_mu2, w, h, buf_stride, buf_stride, buf_stride, buf_stride, buf_stride); vif_xx_yy_xy(curr_ref_scale, curr_dis_scale, ref_sq, dis_sq, ref_dis, w, h, curr_ref_stride, curr_dis_stride, buf_stride, buf_stride, buf_stride); +#endif #ifdef VIF_OPT_FILTER_1D +#if VIF_OPT_ENABLE + + // Code optimized by adding intrinsic code for the functions, + // vif_filter1d_sq and vif_filter1d_sq + vif_filter1d_sq(filter, curr_ref_scale, ref_sq_filt, tmpbuf, w, h, curr_ref_stride, buf_stride, filter_width); + vif_filter1d_sq(filter, curr_dis_scale, dis_sq_filt, tmpbuf, w, h, curr_dis_stride, buf_stride, filter_width); + vif_filter1d_xy(filter, curr_ref_scale, curr_dis_scale, ref_dis_filt, tmpbuf, w, h, curr_ref_stride, curr_dis_stride, buf_stride, filter_width); +#else vif_filter1d(filter, ref_sq, ref_sq_filt, tmpbuf, w, h, buf_stride, buf_stride, filter_width); vif_filter1d(filter, dis_sq, dis_sq_filt, tmpbuf, w, h, buf_stride, buf_stride, filter_width); vif_filter1d(filter, ref_dis, ref_dis_filt, tmpbuf, w, h, buf_stride, buf_stride, filter_width); +#endif #else vif_filter2d(filter, ref_sq, ref_sq_filt, w, h, buf_stride, buf_stride, filter_width); vif_filter2d(filter, dis_sq, dis_sq_filt, w, h, buf_stride, buf_stride, filter_width); vif_filter2d(filter, ref_dis, ref_dis_filt, w, h, buf_stride, buf_stride, filter_width); #endif +#if VIF_OPT_ENABLE + vif_statistic(mu1, mu2, NULL, ref_sq_filt, dis_sq_filt, ref_dis_filt, num_array, den_array, + w, h, buf_stride, buf_stride, buf_stride, buf_stride, buf_stride, buf_stride, buf_stride, buf_stride); +#else vif_statistic(mu1_sq, mu2_sq, mu1_mu2, ref_sq_filt, dis_sq_filt, ref_dis_filt, num_array, den_array, w, h, buf_stride, buf_stride, buf_stride, buf_stride, buf_stride, buf_stride, buf_stride, buf_stride); - +#endif mu1_adj = ADJUST(mu1); mu2_adj = ADJUST(mu2); #ifdef VIF_OPT_DEBUG_DUMP - mu1_sq_adj = ADJUST(mu1_sq); - mu2_sq_adj = ADJUST(mu2_sq); - mu1_mu2_adj = ADJUST(mu1_mu2); - ref_sq_filt_adj = ADJUST(ref_sq_filt); dis_sq_filt_adj = ADJUST(dis_sq_filt); ref_dis_filt_adj = ADJUST(ref_dis_filt); #endif +#if !VIF_OPT_ENABLE num_array_adj = ADJUST(num_array); den_array_adj = ADJUST(den_array); +#endif #undef ADJUST #ifdef VIF_OPT_DEBUG_DUMP @@ -237,15 +295,6 @@ int compute_vif(const float *ref, const float *dis, int w, int h, int ref_stride sprintf(pathbuf, "stage/mu2[%d].bin", scale); write_image(pathbuf, mu2_adj, buf_valid_w, buf_valid_h, buf_stride, sizeof(float)); - sprintf(pathbuf, "stage/mu1_sq[%d].bin", scale); - write_image(pathbuf, mu1_sq_adj, buf_valid_w, buf_valid_h, buf_stride, sizeof(float)); - - sprintf(pathbuf, "stage/mu2_sq[%d].bin", scale); - write_image(pathbuf, mu2_sq_adj, buf_valid_w, buf_valid_h, buf_stride, sizeof(float)); - - sprintf(pathbuf, "stage/mu1_mu2[%d].bin", scale); - write_image(pathbuf, mu1_mu2_adj, buf_valid_w, buf_valid_h, buf_stride, sizeof(float)); - sprintf(pathbuf, "stage/ref_sq_filt[%d].bin", scale); write_image(pathbuf, ref_sq_filt_adj, buf_valid_w, buf_valid_h, buf_stride, sizeof(float)); @@ -262,8 +311,13 @@ int compute_vif(const float *ref, const float *dis, int w, int h, int ref_stride write_image(pathbuf, den_array_adj, buf_valid_w, buf_valid_h, buf_stride, sizeof(float)); #endif +#if VIF_OPT_ENABLE + num = *num_array; + den = *den_array; +#else num = vif_sum(num_array_adj, buf_valid_w, buf_valid_h, buf_stride); den = vif_sum(den_array_adj, buf_valid_w, buf_valid_h, buf_stride); +#endif scores[2*scale] = num; scores[2*scale+1] = den; diff --git a/feature/src/vif_options.h b/feature/src/vif_options.h index ef1f93b2e..61fe2ae49 100644 --- a/feature/src/vif_options.h +++ b/feature/src/vif_options.h @@ -36,4 +36,10 @@ /* Whether to use a 1-D formulation of the Gaussian filter. */ #define VIF_OPT_FILTER_1D +/* VIF optimizations are enabled only for ID filter */ +#ifdef VIF_OPT_FILTER_1D +#define VIF_OPT_ENABLE 1 +#else +#define VIF_OPT_ENABLE 0 +#endif #endif /* VIF_OPTIONS_H_ */ diff --git a/feature/src/vif_tools.c b/feature/src/vif_tools.c index 8fae514e2..8f753fec5 100644 --- a/feature/src/vif_tools.c +++ b/feature/src/vif_tools.c @@ -23,6 +23,8 @@ #include #include "common/alloc.h" #include "vif_options.h" +#include "convolution.h" +#include "convolution_internal.h" #include "vif_tools.h" #include "common/cpu.h" @@ -211,6 +213,76 @@ void vif_xx_yy_xy_s(const float *x, const float *y, float *xx, float *yy, float } } +#if VIF_OPT_ENABLE +void vif_statistic_s(const float *mu1, const float *mu2, const float *mu1_mu2, const float *xx_filt, const float *yy_filt, const float *xy_filt, float *num, float *den, + int w, int h, int mu1_stride, int mu2_stride, int mu1_mu2_stride, int xx_filt_stride, int yy_filt_stride, int xy_filt_stride, int num_stride, int den_stride) +{ + static const float sigma_nsq = 2; + static const float sigma_max_inv = 4.0 / (255.0*255.0); + + int mu1_px_stride = mu1_stride / sizeof(float); + int mu2_px_stride = mu2_stride / sizeof(float); + int mu1_mu2_px_stride = mu1_mu2_stride / sizeof(float); + int xx_filt_px_stride = xx_filt_stride / sizeof(float); + int yy_filt_px_stride = yy_filt_stride / sizeof(float); + int xy_filt_px_stride = xy_filt_stride / sizeof(float); + int num_px_stride = num_stride / sizeof(float); + int den_px_stride = den_stride / sizeof(float); + + float mu1_sq_val, mu2_sq_val, mu1_mu2_val, xx_filt_val, yy_filt_val, xy_filt_val; + float sigma1_sq, sigma2_sq, sigma12, g, sv_sq; + float num_val, den_val; + int i, j; + + float accum_num = 0.0; + float accum_den = 0.0; + + for (i = 0; i < h; ++i) { + float accum_inner_num = 0; + float accum_inner_den = 0; + for (j = 0; j < w; ++j) { + float mu1_val = mu1[i * mu1_px_stride + j]; + float mu2_val = mu2[i * mu2_px_stride + j]; + mu1_sq_val = mu1_val * mu1_val; // same name as the Matlab code vifp_mscale.m + mu2_sq_val = mu2_val * mu2_val; + mu1_mu2_val = mu1_val * mu2_val; //mu1_mu2[i * mu1_mu2_px_stride + j]; + xx_filt_val = xx_filt[i * xx_filt_px_stride + j]; + yy_filt_val = yy_filt[i * yy_filt_px_stride + j]; + xy_filt_val = xy_filt[i * xy_filt_px_stride + j]; + + sigma1_sq = xx_filt_val - mu1_sq_val; + sigma2_sq = yy_filt_val - mu2_sq_val; + sigma12 = xy_filt_val - mu1_mu2_val; + + if (sigma1_sq < sigma_nsq) { + num_val = 1.0 - sigma2_sq * sigma_max_inv; + den_val = 1.0; + } + else { + sv_sq = (sigma2_sq + sigma_nsq) * sigma1_sq; + if (sigma12 < 0) + { + num_val = 0.0; + } + else + { + g = sv_sq - sigma12 * sigma12; + num_val = log2f(sv_sq / g); + } + den_val = log2f(1.0f + sigma1_sq / sigma_nsq); + } + + accum_inner_num += num_val; + accum_inner_den += den_val; + } + + accum_num += accum_inner_num; + accum_den += accum_inner_den; + } + num[0] = accum_num; + den[0] = accum_den; +} +#else void vif_statistic_s(const float *mu1_sq, const float *mu2_sq, const float *mu1_mu2, const float *xx_filt, const float *yy_filt, const float *xy_filt, float *num, float *den, int w, int h, int mu1_sq_stride, int mu2_sq_stride, int mu1_mu2_stride, int xx_filt_stride, int yy_filt_stride, int xy_filt_stride, int num_stride, int den_stride) { @@ -267,6 +339,7 @@ void vif_statistic_s(const float *mu1_sq, const float *mu2_sq, const float *mu1_ } } } +#endif void vif_filter1d_s(const float *f, const float *src, float *dst, float *tmpbuf, int w, int h, int src_stride, int dst_stride, int fwidth) { @@ -329,6 +402,136 @@ void vif_filter1d_s(const float *f, const float *src, float *dst, float *tmpbuf, aligned_free(tmp); } +#if VIF_OPT_ENABLE +// Code optimized by adding intrinsic code for the functions, +// vif_filter1d_sq and vif_filter1d_sq + +void vif_filter1d_sq_s(const float *f, const float *src, float *dst, float *tmpbuf, int w, int h, int src_stride, int dst_stride, int fwidth) +{ + + int src_px_stride = src_stride / sizeof(float); + int dst_px_stride = dst_stride / sizeof(float); + + /* if support avx */ + + if (cpu >= VMAF_CPU_AVX) + { + convolution_f32_avx_sq_s(f, fwidth, src, dst, tmpbuf, w, h, src_px_stride, dst_px_stride); + return; + } + + /* fall back */ + + float *tmp = aligned_malloc(ALIGN_CEIL(w * sizeof(float)), MAX_ALIGN); + float fcoeff, imgcoeff; + + int i, j, fi, fj, ii, jj; + + for (i = 0; i < h; ++i) { + /* Vertical pass. */ + for (j = 0; j < w; ++j) { + float accum = 0; + + for (fi = 0; fi < fwidth; ++fi) { + fcoeff = f[fi]; + + ii = i - fwidth / 2 + fi; + ii = ii < 0 ? -ii : (ii >= h ? 2 * h - ii - 1 : ii); + + imgcoeff = src[ii * src_px_stride + j]; + + accum += fcoeff * (imgcoeff * imgcoeff); + } + + tmp[j] = accum; + } + + /* Horizontal pass. */ + for (j = 0; j < w; ++j) { + float accum = 0; + + for (fj = 0; fj < fwidth; ++fj) { + fcoeff = f[fj]; + + jj = j - fwidth / 2 + fj; + jj = jj < 0 ? -jj : (jj >= w ? 2 * w - jj - 1 : jj); + + imgcoeff = tmp[jj]; + + accum += fcoeff * imgcoeff; + } + + dst[i * dst_px_stride + j] = accum; + } + } + + aligned_free(tmp); +} + +void vif_filter1d_xy_s(const float *f, const float *src1, const float *src2, float *dst, float *tmpbuf, int w, int h, int src1_stride, int src2_stride, int dst_stride, int fwidth) +{ + + int src1_px_stride = src1_stride / sizeof(float); + int src2_px_stride = src1_stride / sizeof(float); + int dst_px_stride = dst_stride / sizeof(float); + + /* if support avx */ + + if (cpu >= VMAF_CPU_AVX) + { + convolution_f32_avx_xy_s(f, fwidth, src1, src2, dst, tmpbuf, w, h, src1_px_stride, src2_px_stride, dst_px_stride); + return; + } + + /* fall back */ + + float *tmp = aligned_malloc(ALIGN_CEIL(w * sizeof(float)), MAX_ALIGN); + float fcoeff, imgcoeff, imgcoeff1, imgcoeff2; + + int i, j, fi, fj, ii, jj; + + for (i = 0; i < h; ++i) { + /* Vertical pass. */ + for (j = 0; j < w; ++j) { + float accum = 0; + + for (fi = 0; fi < fwidth; ++fi) { + fcoeff = f[fi]; + + ii = i - fwidth / 2 + fi; + ii = ii < 0 ? -ii : (ii >= h ? 2 * h - ii - 1 : ii); + + imgcoeff1 = src1[ii * src1_px_stride + j]; + imgcoeff2 = src2[ii * src2_px_stride + j]; + + accum += fcoeff * (imgcoeff1 * imgcoeff2); + } + + tmp[j] = accum; + } + + /* Horizontal pass. */ + for (j = 0; j < w; ++j) { + float accum = 0; + + for (fj = 0; fj < fwidth; ++fj) { + fcoeff = f[fj]; + + jj = j - fwidth / 2 + fj; + jj = jj < 0 ? -jj : (jj >= w ? 2 * w - jj - 1 : jj); + + imgcoeff = tmp[jj]; + + accum += fcoeff * imgcoeff; + } + + dst[i * dst_px_stride + j] = accum; + } + } + + aligned_free(tmp); +} +#endif void vif_filter2d_s(const float *f, const float *src, float *dst, int w, int h, int src_stride, int dst_stride, int fwidth) { diff --git a/feature/src/vif_tools.h b/feature/src/vif_tools.h index 60c56bc15..d2a1bd97d 100644 --- a/feature/src/vif_tools.h +++ b/feature/src/vif_tools.h @@ -43,6 +43,12 @@ void vif_statistic_s(const float *mu1_sq, const float *mu2_sq, const float *mu1_ void vif_filter1d_s(const float *f, const float *src, float *dst, float *tmpbuf, int w, int h, int src_stride, int dst_stride, int fwidth); +#if VIF_OPT_ENABLE +void vif_filter1d_sq_s(const float *f, const float *src, float *dst, float *tmpbuf, int w, int h, int src_stride, int dst_stride, int fwidth); + +void vif_filter1d_xy_s(const float *f, const float *src1, const float *src2, float *dst, float *tmpbuf, int w, int h, int src1_stride, int src2_stride, int dst_stride, int fwidth); +#endif + void vif_filter2d_s(const float *f, const float *src, float *dst, int w, int h, int src_stride, int dst_stride, int fwidth); #endif /* VIF_TOOLS_H_ */ diff --git a/wrapper/Makefile b/wrapper/Makefile index 7f7e81d27..931751584 100644 --- a/wrapper/Makefile +++ b/wrapper/Makefile @@ -9,6 +9,8 @@ OBJDIR = $(TOP)/obj FEATURESRCDIR = $(TOP)/../feature/src PTOOLSDIR = $(TOP)/../ptools INSTALL_PREFIX = /usr/local +INCLUDES += -I$(TOP)/../feature/src +INCLUDES += -I$(TOP)/../feature/src/common OBJS = \ $(OBJDIR)/alloc.o \ @@ -72,13 +74,13 @@ $(OBJDIR)/frame.o: $(FEATURESRCDIR)/common/frame.c $(CC) -c -o $@ $(CFLAGS) $(CPPFLAGS) $< $(OBJDIR)/convolution.o: $(FEATURESRCDIR)/common/convolution.c - $(CC) -c -o $@ $(CFLAGS) $(CPPFLAGS) $< + $(CC) -c -o $@ $(CFLAGS) $(CPPFLAGS) $(INCLUDES) $< $(OBJDIR)/cpu.o: $(FEATURESRCDIR)/common/cpu.c $(CC) -c -o $@ $(CFLAGS) $(CPPFLAGS) $< $(OBJDIR)/convolution_avx.o: $(FEATURESRCDIR)/common/convolution_avx.c - $(CC) -c -o $@ $(EXTRA_CFLAGS) $(CFLAGS) $(CPPFLAGS) $< + $(CC) -c -o $@ $(EXTRA_CFLAGS) $(CFLAGS) $(CPPFLAGS) $(INCLUDES) $< $(OBJDIR)/psnr_tools.o: $(FEATURESRCDIR)/psnr_tools.c $(CC) -c -o $@ $(EXTRA_CFLAGS) $(CFLAGS) $(CPPFLAGS) $< @@ -96,10 +98,10 @@ $(OBJDIR)/ansnr_tools.o: $(FEATURESRCDIR)/ansnr_tools.c $(CC) -c -o $@ $(CFLAGS) $(CPPFLAGS) $< $(OBJDIR)/vif.o: $(FEATURESRCDIR)/vif.c - $(CC) -c -o $@ $(CFLAGS) $(CPPFLAGS) $< + $(CC) -c -o $@ $(CFLAGS) $(CPPFLAGS) $(INCLUDES) $< $(OBJDIR)/vif_tools.o: $(FEATURESRCDIR)/vif_tools.c - $(CC) -c -o $@ $(CFLAGS) $(CPPFLAGS) $< + $(CC) -c -o $@ $(CFLAGS) $(CPPFLAGS) $(INCLUDES) $< $(OBJDIR)/motion.o: $(FEATURESRCDIR)/motion.c $(CC) -c -o $@ $(CFLAGS) $(CPPFLAGS) $< diff --git a/wrapper/src/combo.c b/wrapper/src/combo.c index 1bbe66890..c96c451b9 100644 --- a/wrapper/src/combo.c +++ b/wrapper/src/combo.c @@ -99,6 +99,10 @@ void* combo_threadfunc(void* vmaf_thread_data) int ret = 0; bool next_frame_read; +#if BUF_OPT_ENABLE + bool offset_flag; +#endif + #ifdef MULTI_THREADING float *prev_blur_buf_ = 0; float *ref_buf_ = 0; @@ -106,6 +110,7 @@ void* combo_threadfunc(void* vmaf_thread_data) float *blur_buf_ = 0; #endif +#if !BUF_OPT_ENABLE if (!(ref_buf = aligned_malloc(data_sz, MAX_ALIGN))) { sprintf(errmsg, "aligned_malloc failed for ref_buf.\n"); @@ -143,6 +148,7 @@ void* combo_threadfunc(void* vmaf_thread_data) sprintf(errmsg, "aligned_malloc failed for next_blur_buf.\n"); goto fail_or_end; } +#endif // use temp_buf for convolution_f32_c, and fread u and v if (!(temp_buf = aligned_malloc(data_sz * 2, MAX_ALIGN))) @@ -173,6 +179,23 @@ void* combo_threadfunc(void* vmaf_thread_data) if (frm_idx == 0) { +#if BUF_OPT_ENABLE + // Allocating the free buffers from buffer array + blur_buf = get_free_blur_buf_slot(&thread_data->blur_buf_array, frm_idx); + ref_buf = get_free_blur_buf_slot(&thread_data->ref_buf_array, frm_idx); + dis_buf = get_free_blur_buf_slot(&thread_data->dis_buf_array, frm_idx); + + if((NULL == blur_buf) || (NULL == ref_buf) || (NULL == dis_buf)) + { +#ifdef MULTI_THREADING + thread_data->stop_threads = 1; + sprintf(errmsg, "No free slot found for buffer allocation.\n"); + pthread_mutex_unlock(&thread_data->mutex_readframe); +#endif + goto fail_or_end; + } +#endif + // read frame from file ret = thread_data->read_frame(ref_buf, dis_buf, temp_buf, stride, user_data); @@ -208,13 +231,31 @@ void* combo_threadfunc(void* vmaf_thread_data) convolution_f32_c(FILTER_5, 5, ref_buf, blur_buf, temp_buf, w, h, stride / sizeof(float), stride / sizeof(float)); #ifdef MULTI_THREADING +#if !BUF_OPT_ENABLE put_blur_buf(&thread_data->blur_buf_array, frm_idx, blur_buf); +#endif #endif } #ifdef MULTI_THREADING else { +#if BUF_OPT_ENABLE + // retrieve from buffer array + ref_buf = get_blur_buf(&thread_data->ref_buf_array, frm_idx); + dis_buf = get_blur_buf(&thread_data->dis_buf_array, frm_idx); + blur_buf = get_blur_buf(&thread_data->blur_buf_array, frm_idx); + + if((NULL == ref_buf) || (NULL == dis_buf) || (NULL == blur_buf)) + { +#ifdef MULTI_THREADING + thread_data->stop_threads = 1; + sprintf(errmsg, "Data not available.\n"); + pthread_mutex_unlock(&thread_data->mutex_readframe); +#endif + goto fail_or_end; + } +#else // retrieve from buffer array ref_buf_ = get_blur_buf(&thread_data->ref_buf_array, frm_idx); @@ -228,6 +269,22 @@ void* combo_threadfunc(void* vmaf_thread_data) blur_buf_ = get_blur_buf(&thread_data->blur_buf_array, frm_idx); memcpy(blur_buf, blur_buf_, data_sz); // don't releave blur_buf_array of frm_idx yet, since it will be used by the next frame again +#endif + } +#endif + +#if BUF_OPT_ENABLE + // Allocate free buffer from the buffer array for next frame index + next_ref_buf = get_free_blur_buf_slot(&thread_data->ref_buf_array, frm_idx + 1); + next_dis_buf = get_free_blur_buf_slot(&thread_data->dis_buf_array, frm_idx + 1); + if((NULL == next_ref_buf) || (NULL == next_dis_buf)) + { +#ifdef MULTI_THREADING + thread_data->stop_threads = 1; + sprintf(errmsg, "No free slot found for next buffer.\n"); + pthread_mutex_unlock(&thread_data->mutex_readframe); +#endif + goto fail_or_end; } #endif @@ -252,12 +309,25 @@ void* combo_threadfunc(void* vmaf_thread_data) next_frame_read = true; } +#if !BUF_OPT_ENABLE #ifdef MULTI_THREADING pthread_mutex_unlock(&thread_data->mutex_readframe); +#endif #endif if (next_frame_read) { +#if BUF_OPT_ENABLE + next_blur_buf = get_free_blur_buf_slot(&thread_data->blur_buf_array, frm_idx + 1); + if(NULL == next_blur_buf) + { +#ifdef MULTI_THREADING + thread_data->stop_threads = 1; + sprintf(errmsg, "No free slot found for blur buffer.\n"); +#endif + goto fail_or_end; + } +#endif // =============================================================== // offset pixel by OPT_RANGE_PIXEL_OFFSET // =============================================================== @@ -272,14 +342,24 @@ void* combo_threadfunc(void* vmaf_thread_data) // =============================================================== convolution_f32_c(FILTER_5, 5, next_ref_buf, next_blur_buf, temp_buf, w, h, stride / sizeof(float), stride / sizeof(float)); +#if !BUF_OPT_ENABLE #ifdef MULTI_THREADING // save next_ref_buf, next_ref_buf and next_ref_buf to buffer array put_blur_buf(&thread_data->ref_buf_array, frm_idx + 1, next_ref_buf); put_blur_buf(&thread_data->dis_buf_array, frm_idx + 1, next_dis_buf); put_blur_buf(&thread_data->blur_buf_array, frm_idx + 1, next_blur_buf); +#endif #endif } +#if BUF_OPT_ENABLE + // release ref and dis buffer references after blur buf computation + release_blur_buf_reference(&thread_data->ref_buf_array, frm_idx + 1); + release_blur_buf_reference(&thread_data->dis_buf_array, frm_idx + 1); +#ifdef MULTI_THREADING + pthread_mutex_unlock(&thread_data->mutex_readframe); +#endif +#endif dbg_printf("frame: %d, ", frm_idx); // =============================================================== @@ -287,9 +367,18 @@ void* combo_threadfunc(void* vmaf_thread_data) // step they have been offset by OPT_RANGE_PIXEL_OFFSET, now // offset them back. // =============================================================== +#if BUF_OPT_ENABLE + // offset back the buffers only if required + if (frm_idx % n_subsample == 0 && ( (thread_data->psnr_array != NULL) || (thread_data->ssim_array != NULL) || (thread_data->ms_ssim_array != NULL) )) + { + offset_image(ref_buf, -OPT_RANGE_PIXEL_OFFSET, w, h, stride); + offset_image(dis_buf, -OPT_RANGE_PIXEL_OFFSET, w, h, stride); + offset_flag = true; + } +#else offset_image(ref_buf, -OPT_RANGE_PIXEL_OFFSET, w, h, stride); offset_image(dis_buf, -OPT_RANGE_PIXEL_OFFSET, w, h, stride); - +#endif if (frm_idx % n_subsample == 0 && thread_data->psnr_array != NULL) { /* =========== psnr ============== */ @@ -338,8 +427,17 @@ void* combo_threadfunc(void* vmaf_thread_data) // =============================================================== // for the rest, offset pixel by OPT_RANGE_PIXEL_OFFSET // =============================================================== +#if BUF_OPT_ENABLE + if(offset_flag) + { + offset_image(ref_buf, OPT_RANGE_PIXEL_OFFSET, w, h, stride); + offset_image(dis_buf, OPT_RANGE_PIXEL_OFFSET, w, h, stride); + offset_flag = false; + } +#else offset_image(ref_buf, OPT_RANGE_PIXEL_OFFSET, w, h, stride); offset_image(dis_buf, OPT_RANGE_PIXEL_OFFSET, w, h, stride); +#endif /* =========== adm ============== */ if (frm_idx % n_subsample == 0) @@ -421,8 +519,19 @@ void* combo_threadfunc(void* vmaf_thread_data) else { #ifdef MULTI_THREADING +#if BUF_OPT_ENABLE + // avoid multiple memory copies + prev_blur_buf = get_blur_buf(&thread_data->blur_buf_array, frm_idx - 1); + if(NULL == prev_blur_buf) + { + thread_data->stop_threads = 1; + sprintf(errmsg, "Data not available for prev_blur_buf.\n"); + goto fail_or_end; + } +#else prev_blur_buf_ = get_blur_buf(&thread_data->blur_buf_array, frm_idx - 1); memcpy(prev_blur_buf, prev_blur_buf_, data_sz); +#endif #endif if ((ret = compute_motion(prev_blur_buf, blur_buf, w, h, stride, stride, &score))) { @@ -430,7 +539,11 @@ void* combo_threadfunc(void* vmaf_thread_data) goto fail_or_end; } #ifdef MULTI_THREADING +#if BUF_OPT_ENABLE + release_blur_buf_reference(&thread_data->blur_buf_array, frm_idx - 1); +#else release_blur_buf(&thread_data->blur_buf_array, frm_idx - 1); +#endif #endif if (next_frame_read) @@ -446,7 +559,9 @@ void* combo_threadfunc(void* vmaf_thread_data) { score2 = score; #ifdef MULTI_THREADING +#if !BUF_OPT_ENABLE release_blur_buf(&thread_data->blur_buf_array, frm_idx); // no more next frames, release this one too +#endif #endif } } @@ -458,6 +573,7 @@ void* combo_threadfunc(void* vmaf_thread_data) insert_array_at(thread_data->motion2_array, score2, frm_idx); } +#if !BUF_OPT_ENABLE else { #ifdef MULTI_THREADING @@ -473,6 +589,12 @@ void* combo_threadfunc(void* vmaf_thread_data) } #endif } +#else + /* Indicate that motion score computation for this frame is complete */ + insert_array_at(thread_data->motion_score_compute_flag_array, 1.0, frm_idx); + release_blur_buf_reference(&thread_data->blur_buf_array, frm_idx + 1); +#endif + /* =========== vif ============== */ if (frm_idx % n_subsample == 0) @@ -508,12 +630,60 @@ void* combo_threadfunc(void* vmaf_thread_data) dbg_printf("\n"); +#if BUF_OPT_ENABLE + //Release references to reference and distorted buffers + release_blur_buf_reference(&thread_data->ref_buf_array, frm_idx); + release_blur_buf_reference(&thread_data->dis_buf_array, frm_idx); + release_blur_buf_reference(&thread_data->blur_buf_array, frm_idx); + /*Loop through the slots and release slots if there are no more + reference till the current index. Not releasing next frame as + it may be required for the next loop */ + for(int i = 0; i <= frm_idx; i++) + { + int ref_reference_count = get_blur_buf_reference_count(&thread_data->ref_buf_array, i); + int dis_reference_count = get_blur_buf_reference_count(&thread_data->dis_buf_array, i); + + if((ref_reference_count == 0) && (dis_reference_count == 0)) + { + release_blur_buf_slot(&thread_data->ref_buf_array, i); + release_blur_buf_slot(&thread_data->dis_buf_array, i); + } + } + + /* Loop through the blur buffer array and release slots only till current index - 1 */ + /* Only for those whose reference counter is zero */ + for(int i = 0; i <= (frm_idx - 1); i++) + { + int reference_count = get_blur_buf_reference_count(&thread_data->blur_buf_array, i); + if(reference_count == 0) + { + /* Release buffer only if motion score is computed for current, previous and next frame */ + if( + (get_at(thread_data->motion_score_compute_flag_array, i)) && + (get_at(thread_data->motion_score_compute_flag_array, i + 1)) && + ((i == 0) || (get_at(thread_data->motion_score_compute_flag_array, i - 1))) + ) + { + release_blur_buf_slot(&thread_data->blur_buf_array, i); + } + } + } + + /* If this is the last frame then release any subsequent slots */ + if (!next_frame_read) + { + release_blur_buf_slot(&thread_data->ref_buf_array, frm_idx + 1); + release_blur_buf_slot(&thread_data->dis_buf_array, frm_idx + 1); + release_blur_buf_slot(&thread_data->blur_buf_array, frm_idx); + } +#else #ifndef MULTI_THREADING // copy to prev_buf memcpy(prev_blur_buf, blur_buf, data_sz); memcpy(ref_buf, next_ref_buf, data_sz); memcpy(dis_buf, next_dis_buf, data_sz); memcpy(blur_buf, next_blur_buf, data_sz); +#endif #endif if (!next_frame_read) @@ -528,6 +698,7 @@ void* combo_threadfunc(void* vmaf_thread_data) fail_or_end: +#if !BUF_OPT_ENABLE aligned_free(ref_buf); aligned_free(dis_buf); aligned_free(prev_blur_buf); @@ -535,6 +706,7 @@ void* combo_threadfunc(void* vmaf_thread_data) aligned_free(next_dis_buf); aligned_free(next_blur_buf); aligned_free(blur_buf); +#endif aligned_free(temp_buf); #ifdef MULTI_THREADING @@ -616,6 +788,12 @@ int combo(int (*read_frame)(float *ref_data, float *main_data, float *temp_data, combo_thread_data.stop_threads = 0; combo_thread_data.n_subsample = n_subsample; +#if BUF_OPT_ENABLE + DArray motion_score_compute_flag_array; + init_array(&motion_score_compute_flag_array, 1000); + combo_thread_data.motion_score_compute_flag_array = &motion_score_compute_flag_array; +#endif + // sanity check for width/height if (w <= 0 || h <= 0 || (size_t)w > ALIGN_FLOOR(INT_MAX) / sizeof(float)) { @@ -649,10 +827,22 @@ int combo(int (*read_frame)(float *ref_data, float *main_data, float *temp_data, } // for motion analysis we compare to previous buffer and next buffer +#if BUF_OPT_ENABLE + /* + * In the multi-thread mode, allocate a fixed size buffer pool for the reference, distorted and blur buffers. + * At any point, the no. of required ref and dis buffers is 1 more than the total no. of allotted threads, + to accomodate reading the next frame index. + * At any point, one thread operates on the current, previous and next blur buffers, and hence, the no. of + required blur buffers will be three times the total no. of allotted threads. + */ + init_blur_array(&combo_thread_data.ref_buf_array, combo_thread_data.thread_count + 1, combo_thread_data.data_sz, MAX_ALIGN); + init_blur_array(&combo_thread_data.dis_buf_array, combo_thread_data.thread_count + 1, combo_thread_data.data_sz, MAX_ALIGN); + init_blur_array(&combo_thread_data.blur_buf_array, 3 * (combo_thread_data.thread_count), combo_thread_data.data_sz, MAX_ALIGN); +#else init_blur_array(&combo_thread_data.ref_buf_array, combo_thread_data.thread_count, combo_thread_data.data_sz, MAX_ALIGN); init_blur_array(&combo_thread_data.dis_buf_array, combo_thread_data.thread_count, combo_thread_data.data_sz, MAX_ALIGN); init_blur_array(&combo_thread_data.blur_buf_array, combo_thread_data.thread_count + 2, combo_thread_data.data_sz, MAX_ALIGN); - +#endif // initialize the mutex that protects the read_frame function pthread_mutex_init(&combo_thread_data.mutex_readframe, NULL); @@ -663,9 +853,9 @@ int combo(int (*read_frame)(float *ref_data, float *main_data, float *temp_data, // start threads int t; - int numThread = combo_thread_data.thread_count; - pthread_t* thread = (pthread_t*)calloc(numThread, sizeof(pthread_t)); - memset(thread, 0, numThread * sizeof(pthread_t)); + int numThread = combo_thread_data.thread_count; + pthread_t* thread = (pthread_t*)calloc(numThread, sizeof(pthread_t)); + memset(thread, 0, numThread * sizeof(pthread_t)); for (t=0; t < combo_thread_data.thread_count; t++) { @@ -691,7 +881,12 @@ int combo(int (*read_frame)(float *ref_data, float *main_data, float *temp_data, free_blur_buf(&combo_thread_data.dis_buf_array); free_blur_buf(&combo_thread_data.blur_buf_array); - free(thread); +#if BUF_OPT_ENABLE + free_array(&motion_score_compute_flag_array); +#endif + + free(thread); + return 0; } diff --git a/wrapper/src/combo.h b/wrapper/src/combo.h index a0558e500..1b335cc64 100644 --- a/wrapper/src/combo.h +++ b/wrapper/src/combo.h @@ -78,6 +78,9 @@ typedef struct BLUR_BUF_ARRAY blur_buf_array; BLUR_BUF_ARRAY ref_buf_array; BLUR_BUF_ARRAY dis_buf_array; +#if BUF_OPT_ENABLE + DArray *motion_score_compute_flag_array; +#endif #endif int ret; diff --git a/wrapper/src/darray.c b/wrapper/src/darray.c index c9ab7c6aa..a0fcf0a1e 100644 --- a/wrapper/src/darray.c +++ b/wrapper/src/darray.c @@ -18,10 +18,14 @@ #include #include "darray.h" +#include "common/blur_array.h" void init_array(DArray *a, size_t init_size) { a->array = (double *)malloc(init_size * sizeof(double)); +#if BUF_OPT_ENABLE + memset(a->array, 0.0, init_size * sizeof(double)); +#endif a->used = 0; a->size = init_size; #ifdef MULTI_THREADING @@ -37,6 +41,12 @@ void insert_array(DArray *a, double e) if (a->used == a->size) { a->size *= 2; +#if BUF_OPT_ENABLE + double *temp; + temp = a->array; + temp += (a->size / 2); + memset(temp, 0.0, (a->size / 2) * sizeof(double)); +#endif a->array = (double *)realloc(a->array, a->size * sizeof(double)); } a->array[a->used++] = e; @@ -59,6 +69,12 @@ void insert_array_at(DArray *a, double e, int pos) { a->size *= 2; a->array = (double *)realloc(a->array, a->size * sizeof(double)); +#if BUF_OPT_ENABLE + double *temp; + temp = a->array; + temp += (a->size / 2); + memset(temp, 0.0, (a->size / 2) * sizeof(double)); +#endif } a->array[pos] = e; #ifdef MULTI_THREADING diff --git a/wrapper/src/libvmaf.h b/wrapper/src/libvmaf.h index cffdef4e2..22dbc039f 100644 --- a/wrapper/src/libvmaf.h +++ b/wrapper/src/libvmaf.h @@ -19,6 +19,15 @@ #ifndef LIBVMAF_H_ #define LIBVMAF_H_ +#ifndef WINCE +#define TIME_TEST_ENABLE 1 // 1: memory leak test enable 0: disable +#define MEM_LEAK_TEST_ENABLE 0 // prints execution time in xml log when enabled. +#else +//For Windows memory leak test and execution time test cases are not handled. +#define TIME_TEST_ENABLE 0 +#define MEM_LEAK_TEST_ENABLE 0 +#endif + #ifdef __cplusplus extern "C" { #endif diff --git a/wrapper/src/main.cpp b/wrapper/src/main.cpp index c42879e04..7c714d331 100644 --- a/wrapper/src/main.cpp +++ b/wrapper/src/main.cpp @@ -56,6 +56,54 @@ void print_usage(int argc, char *argv[]) fprintf(stderr, "n_subsample:\n\tn indicates computing on one of every n frames (default 1)\n\n"); } +#if MEM_LEAK_TEST_ENABLE +/* + * Measures the current (and peak) resident and virtual memories + * usage of your linux C process, in kB + */ +void getMemory(int itr_ctr, int state) +{ + int currRealMem; + int peakRealMem; + int currVirtMem; + int peakVirtMem; + char state_str[10]=""; + // stores each word in status file + char buffer[1024] = ""; + + if(state ==1) + strcpy(state_str,"start"); + else + strcpy(state_str,"end"); + + // linux file contains this-process info + FILE* file = fopen("/proc/self/status", "r"); + + // read the entire file + while (fscanf(file, " %1023s", buffer) == 1) + { + if (strcmp(buffer, "VmRSS:") == 0) + { + fscanf(file, " %d", &currRealMem); + } + if (strcmp(buffer, "VmHWM:") == 0) + { + fscanf(file, " %d", &peakRealMem); + } + if (strcmp(buffer, "VmSize:") == 0) + { + fscanf(file, " %d", &currVirtMem); + } + if (strcmp(buffer, "VmPeak:") == 0) + { + fscanf(file, " %d", &peakVirtMem); + } + } + fclose(file); + printf("Iteration %d at %s of process: currRealMem: %6d, peakRealMem: %6d, currVirtMem: %6d, peakVirtMem: %6d\n",itr_ctr, state_str, currRealMem, peakRealMem, currVirtMem, peakVirtMem); +} +#endif + int run_wrapper(char *fmt, int width, int height, char *ref_path, char *dis_path, char *model_path, char *log_path, char *log_fmt, bool disable_clip, bool disable_avx, bool enable_transform, bool phone_model, bool do_psnr, bool do_ssim, bool do_ms_ssim, char *pool_method, int n_thread, int n_subsample, bool enable_conf_interval) @@ -154,7 +202,10 @@ int main(int argc, char *argv[]) int n_subsample = 1; bool enable_conf_interval = false; char *temp; - +#if MEM_LEAK_TEST_ENABLE + int itr_ctr; + int ret = 0; +#endif /* Check parameters */ if (argc < 7) @@ -288,9 +339,20 @@ int main(int argc, char *argv[]) try { +#if MEM_LEAK_TEST_ENABLE + for(itr_ctr=0;itr_ctr<1000;itr_ctr++) + { + getMemory(itr_ctr,1); + ret = run_wrapper(fmt, width, height, ref_path, dis_path, model_path, + log_path, log_fmt, disable_clip, disable_avx, enable_transform, phone_model, + do_psnr, do_ssim, do_ms_ssim, pool_method, n_thread, n_subsample, enable_conf_interval); + getMemory(itr_ctr,2); + } +#else return run_wrapper(fmt, width, height, ref_path, dis_path, model_path, log_path, log_fmt, disable_clip, disable_avx, enable_transform, phone_model, do_psnr, do_ssim, do_ms_ssim, pool_method, n_thread, n_subsample, enable_conf_interval); +#endif } catch (const std::exception &e) { diff --git a/wrapper/src/vmaf.cpp b/wrapper/src/vmaf.cpp index 8f50c47b4..a26c3a57f 100644 --- a/wrapper/src/vmaf.cpp +++ b/wrapper/src/vmaf.cpp @@ -25,6 +25,7 @@ #include #include #include +#include "libvmaf.h" #include "vmaf.h" #include "combo.h" @@ -1003,6 +1004,9 @@ double RunVmaf(const char* fmt, int width, int height, size_t num_frames_subsampled = result.get_scores("vmaf").size(); double aggregate_vmaf = result.get_score("vmaf"); double exec_fps = (double)num_frames_subsampled * n_subsample / (double)timer.elapsed(); +#if TIME_TEST_ENABLE + double time_taken = (double)timer.elapsed(); +#endif printf("Exec FPS: %f\n", exec_fps); std::vector result_keys = result.get_keys(); @@ -1138,6 +1142,9 @@ double RunVmaf(const char* fmt, int width, int height, if (aggregate_ms_ssim) info_node.append_attribute("aggregateMS_SSIM") = aggregate_ms_ssim; info_node.append_attribute("execFps") = exec_fps; +#if TIME_TEST_ENABLE + info_node.append_attribute("timeTaken") = time_taken; +#endif auto frames_node = xml_root.append_child("frames"); for (size_t i_subsampled=0; i_subsampled Date: Wed, 30 Jan 2019 09:11:40 +0800 Subject: [PATCH 11/29] Netflix/vmaf/#286 (#293) * Netflix/vmaf/#286 refactor for provide Result in libvmaf.h * fix bug: crash while predictionStructs empty * format code and activate the CI --- wrapper/src/libvmaf.cpp | 271 +++++++++++++++++++++++++++++++++++----- wrapper/src/libvmaf.h | 79 ++++++++++++ wrapper/src/vmaf.cpp | 15 +-- wrapper/src/vmaf.h | 156 +---------------------- 4 files changed, 324 insertions(+), 197 deletions(-) diff --git a/wrapper/src/libvmaf.cpp b/wrapper/src/libvmaf.cpp index 4b7672a76..e8e09de30 100644 --- a/wrapper/src/libvmaf.cpp +++ b/wrapper/src/libvmaf.cpp @@ -21,41 +21,245 @@ #include #include "cpu.h" +Asset::Asset(int w, int h, const char *fmt) + :w(w), h(h), fmt(fmt) +{ +} + +Asset::Asset(int w, int h) + :w(w), h(h), fmt("yuv420p") +{ +} + +int Asset::getWidth() +{ + return w; +} + +int Asset::getHeight() +{ + return h; +} + +const char* Asset::getFmt() +{ + return fmt; +} + +StatVector::StatVector() +{ +} + +StatVector::StatVector(std::vector l) : l(l) +{ +} + +std::vector StatVector::getVector() +{ + return l; +} + +double StatVector::mean() +{ + _assert_size(); + double sum = 0.0; + for (double e : l) + { + sum += e; + } + return sum / l.size(); +} + +double StatVector::minimum() +{ + _assert_size(); + double min_ = l[0]; + for (double e : l) + { + if (e < min_) + { + min_ = e; + } + } + return min_; +} + +double StatVector::harmonic_mean() +{ + _assert_size(); + double sum = 0.0; + for (double e : l) + { + sum += 1.0 / (e + 1.0); + } + return 1.0 / (sum / l.size()) - 1.0; +} + +double StatVector::second_moment() +{ + _assert_size(); + double sum = 0.0; + for (double e : l) + { + sum += pow(e, 2); + } + return sum / l.size(); +} + +double StatVector::percentile(double perc) +{ + _assert_size(); + if (perc < 0.0) { + perc = 0.0; + } + else if (perc > 100.0) { + perc = 100.0; + } + std::vector l(this->l); + std::sort(l.begin(), l.end()); + double pos = perc * (this->l.size() - 1) / 100.0; + int pos_left = (int)floor(pos); + int pos_right = (int)ceil(pos); + if (pos_left == pos_right) { + return l[pos_left]; + } + else { + return l[pos_left] * (pos_right - pos) + l[pos_right] * (pos - pos_left); + } + +} + +double StatVector::var() +{ + return second_moment() - pow(mean(), 2); +} + +double StatVector::std() +{ + return sqrt(var()); +} + +void StatVector::append(double e) +{ + l.push_back(e); +} +double StatVector::at(size_t idx) +{ + return l.at(idx); +} + +size_t StatVector::size() +{ + return l.size(); +} + +void StatVector::_assert_size() +{ + if (l.size() == 0) { + throw std::runtime_error("StatVector size is 0."); + } +} + +Result::Result() : score_aggregate_method(ScoreAggregateMethod::MEAN) +{ +} + +void Result::set_scores(const std::string &key, const StatVector &scores) +{ + d[key] = scores; +} + +StatVector Result::get_scores(const std::string &key) +{ + return d[key]; +} + +bool Result::has_scores(const std::string &key) +{ + return d.find(key) != d.end(); +} + +double Result::get_score(const std::string &key) +{ + StatVector list = get_scores(key); + if (score_aggregate_method == ScoreAggregateMethod::MINIMUM) + { + return list.minimum(); + } + else if (score_aggregate_method == ScoreAggregateMethod::HARMONIC_MEAN) + { + return list.harmonic_mean(); + } + else // MEAN + { + return list.mean(); + } +} + +std::vector Result::get_keys() +{ + std::vector v; + for (std::map::iterator it = d.begin(); it != d.end(); ++it) + { + v.push_back(it->first); + } + return v; +} + +void Result::setScoreAggregateMethod(ScoreAggregateMethod scoreAggregateMethod) +{ + score_aggregate_method = scoreAggregateMethod; +} + +std::unique_ptr +VmafQualityRunnerFactory::createVmafQualityRunner(const char *model_path, bool enable_conf_interval) { + std::unique_ptr runner_ptr; + if (enable_conf_interval) + { + runner_ptr = std::unique_ptr(new BootstrapVmafQualityRunner(model_path)); + } + else + { + runner_ptr = std::unique_ptr(new VmafQualityRunner(model_path)); + } + return runner_ptr; +} + extern "C" { -enum vmaf_cpu cpu; // global - -int compute_vmaf(double* vmaf_score, char* fmt, int width, int height, int (*read_frame)(float *ref_data, float *main_data, float *temp_data, int stride_byte, void *user_data), - void *user_data, char *model_path, char *log_path, char *log_fmt, int disable_clip, int disable_avx, int enable_transform, int phone_model, int do_psnr, - int do_ssim, int do_ms_ssim, char *pool_method, int n_thread, int n_subsample, int enable_conf_interval) - { - bool d_c = false; - bool d_a = false; - bool e_t = false; - bool d_p = false; - bool d_s = false; - bool d_m_s = false; - - if(enable_transform || phone_model){ - e_t = true; - } - if(disable_clip){ - d_c = true; - } - if(disable_avx){ - d_a = true; - } - if(do_psnr){ - d_p = true; - } - if(do_ssim){ - d_s = true; - } - if(do_ms_ssim){ - d_m_s = true; - } - - cpu = cpu_autodetect(); + enum vmaf_cpu cpu; // global + + int compute_vmaf(double* vmaf_score, char* fmt, int width, int height, int(*read_frame)(float *ref_data, float *main_data, float *temp_data, int stride_byte, void *user_data), + void *user_data, char *model_path, char *log_path, char *log_fmt, int disable_clip, int disable_avx, int enable_transform, int phone_model, int do_psnr, + int do_ssim, int do_ms_ssim, char *pool_method, int n_thread, int n_subsample, int enable_conf_interval) + { + bool d_c = false; + bool d_a = false; + bool e_t = false; + bool d_p = false; + bool d_s = false; + bool d_m_s = false; + + if (enable_transform || phone_model) { + e_t = true; + } + if (disable_clip) { + d_c = true; + } + if (disable_avx) { + d_a = true; + } + if (do_psnr) { + d_p = true; + } + if (do_ssim) { + d_s = true; + } + if (do_ms_ssim) { + d_m_s = true; + } + + cpu = cpu_autodetect(); if (disable_avx) { @@ -83,5 +287,4 @@ int compute_vmaf(double* vmaf_score, char* fmt, int width, int height, int (*rea return -4; } } - } diff --git a/wrapper/src/libvmaf.h b/wrapper/src/libvmaf.h index 22dbc039f..71c82f69e 100644 --- a/wrapper/src/libvmaf.h +++ b/wrapper/src/libvmaf.h @@ -40,4 +40,83 @@ int compute_vmaf(double* vmaf_score, char* fmt, int width, int height, int (*rea } #endif +#ifdef __cplusplus +#include +#include +#include +#include + +class Asset +{ +public: + Asset(int w, int h, const char *fmt); + Asset(int w, int h); + int getWidth(); + int getHeight(); + const char* getFmt(); +private: + const int w, h; + const char *fmt; +}; + +enum ScoreAggregateMethod +{ + MEAN, + HARMONIC_MEAN, + MINIMUM +}; + +class StatVector +{ +public: + StatVector(); + StatVector(std::vector l); + std::vector getVector(); + double mean(); + double minimum(); + double harmonic_mean(); + double second_moment(); + double percentile(double perc); + double var(); + double std(); + void append(double e); + double at(size_t idx); + size_t size(); +private: + std::vector l; + void _assert_size(); +}; + + +class Result +{ +public: + Result(); + void set_scores(const std::string &key, const StatVector &scores); + StatVector get_scores(const std::string &key); + bool has_scores(const std::string &key); + double get_score(const std::string &key); + std::vector get_keys(); + void setScoreAggregateMethod(ScoreAggregateMethod scoreAggregateMethod); +private: + std::map d; + ScoreAggregateMethod score_aggregate_method; +}; + +class IVmafQualityRunner { +public: + virtual Result run(Asset asset, int(*read_frame)(float *ref_data, float *main_data, float *temp_data, + int stride, void *user_data), void *user_data, bool disable_clip, bool enable_transform, + bool do_psnr, bool do_ssim, bool do_ms_ssim, int n_thread, int n_subsample) = 0; + virtual ~IVmafQualityRunner() {} +}; + +class VmafQualityRunnerFactory { +public: + static std::unique_ptr + createVmafQualityRunner(const char *model_path, bool enable_conf_interval); +}; + +#endif + #endif /* _LIBVMAF_H */ diff --git a/wrapper/src/vmaf.cpp b/wrapper/src/vmaf.cpp index a26c3a57f..5972d9c99 100644 --- a/wrapper/src/vmaf.cpp +++ b/wrapper/src/vmaf.cpp @@ -929,7 +929,10 @@ void BootstrapVmafQualityRunner::_set_prediction_result( result.set_scores("ci95_high", ci95HighScore); // num_models is same across frames, so just use first frame length - size_t num_models = predictionStructs.at(0).vmafMultiModelPrediction.size(); + size_t num_models = 0; + if (predictionStructs.size() > 0) { + num_models = predictionStructs.at(0).vmafMultiModelPrediction.size(); + } std::vector perModelScore; // name of the vmaf bootstrap model, e.g. vmaf_0001 is the first one @@ -972,15 +975,7 @@ double RunVmaf(const char* fmt, int width, int height, } Asset asset(width, height, fmt); - std::unique_ptr runner_ptr; - if (enable_conf_interval) - { - runner_ptr = std::unique_ptr(new BootstrapVmafQualityRunner(model_path)); - } - else - { - runner_ptr = std::unique_ptr(new VmafQualityRunner(model_path)); - } + std::unique_ptr runner_ptr = VmafQualityRunnerFactory::createVmafQualityRunner(model_path, enable_conf_interval); Timer timer; timer.start(); diff --git a/wrapper/src/vmaf.h b/wrapper/src/vmaf.h index 0a1a934d6..9a710052c 100644 --- a/wrapper/src/vmaf.h +++ b/wrapper/src/vmaf.h @@ -35,6 +35,7 @@ #include "svm.h" #include "chooseser.h" #include "darray.h" +#include "libvmaf.h" static const std::string BOOSTRAP_VMAF_MODEL_PREFIX = "vmaf_"; @@ -45,157 +46,6 @@ double RunVmaf(const char* fmt, int width, int height, bool do_psnr, bool do_ssim, bool do_ms_ssim, const char *pool_method, int n_thread, int n_subsample, bool enable_conf_interval); -class Asset -{ -public: - Asset(int w, int h, const char *fmt): - w(w), h(h), fmt(fmt) {} - Asset(int w, int h): - w(w), h(h), fmt("yuv420p") {} - int getWidth() { return w; } - int getHeight() { return h; } - const char* getFmt() { return fmt; } -private: - const int w, h; - const char *fmt; -}; - -class StatVector -{ -public: - StatVector() {} - StatVector(std::vector l): l(l) {} - std::vector getVector() - { - return l; - } - double mean() - { - _assert_size(); - double sum = 0.0; - for (double e : l) - { - sum += e; - } - return sum / l.size(); - } - double minimum() - { - _assert_size(); - double min_ = l[0]; - for (double e : l) - { - if (e < min_) - { - min_ = e; - } - } - return min_; - } - double harmonic_mean() - { - _assert_size(); - double sum = 0.0; - for (double e: l) - { - sum += 1.0 / (e + 1.0); - } - return 1.0 / (sum / l.size()) - 1.0; - } - double second_moment() - { - _assert_size(); - double sum = 0.0; - for (double e : l) - { - sum += pow(e, 2); - } - return sum / l.size(); - } - double percentile(double perc) - { - _assert_size(); - if (perc < 0.0) { - perc = 0.0; - } - else if (perc > 100.0) { - perc = 100.0; - } - std::vector l(this->l); - std::sort(l.begin(), l.end()); - double pos = perc * (this->l.size() - 1) / 100.0; - int pos_left = (int)floor(pos); - int pos_right = (int)ceil(pos); - if (pos_left == pos_right) { - return l[pos_left]; - } - else { - return l[pos_left] * (pos_right - pos) + l[pos_right] * (pos - pos_left); - } - - } - double var() { return second_moment() - pow(mean(), 2); } - double std() { return sqrt(var()); } - void append(double e) { l.push_back(e); } - double at(size_t idx) { return l.at(idx); } - size_t size() { return l.size(); } -private: - std::vector l; - void _assert_size() { - if (l.size() == 0) { - throw std::runtime_error("StatVector size is 0."); - } - } -}; - -enum ScoreAggregateMethod -{ - MEAN, - HARMONIC_MEAN, - MINIMUM -}; - -class Result -{ -public: - Result(): score_aggregate_method(ScoreAggregateMethod::MEAN) {} - void set_scores(const std::string &key, const StatVector &scores) { d[key] = scores; } - StatVector get_scores(const std::string &key) { return d[key]; } - bool has_scores(const std::string &key) { return d.find(key) != d.end(); } - double get_score(const std::string &key) - { - StatVector list = get_scores(key); - if (score_aggregate_method == ScoreAggregateMethod::MINIMUM) - { - return list.minimum(); - } - else if (score_aggregate_method == ScoreAggregateMethod::HARMONIC_MEAN) - { - return list.harmonic_mean(); - } - else // MEAN - { - return list.mean(); - } - } - std::vector get_keys() - { - std::vector v; - for (std::map::iterator it = d.begin(); it != d.end(); ++it) - { - v.push_back(it->first); - } - return v; - } - void setScoreAggregateMethod(ScoreAggregateMethod scoreAggregateMethod) - { - score_aggregate_method = scoreAggregateMethod; - } -private: - std::map d; - ScoreAggregateMethod score_aggregate_method; -}; - class VmafException: public std::exception { public: @@ -267,11 +117,11 @@ class BootstrapLibsvmNusvrTrainTestModel: public LibsvmNusvrTrainTestModel { virtual void _assert_model_type(Val model_type); }; -class VmafQualityRunner +class VmafQualityRunner : public IVmafQualityRunner { public: VmafQualityRunner(const char *model_path): model_path(model_path) {} - Result run(Asset asset, int (*read_frame)(float *ref_data, float *main_data, float *temp_data, + virtual Result run(Asset asset, int (*read_frame)(float *ref_data, float *main_data, float *temp_data, int stride, void *user_data), void *user_data, bool disable_clip, bool enable_transform, bool do_psnr, bool do_ssim, bool do_ms_ssim, int n_thread, int n_subsample); virtual ~VmafQualityRunner() {} From 179156201c269aafdae51c98e9aae202ed220427 Mon Sep 17 00:00:00 2001 From: Zhi Li Date: Thu, 31 Jan 2019 13:23:27 -0800 Subject: [PATCH 12/29] Update FAQ.md and libvmaf.md with information on using libvmaf with FFmpeg. --- FAQ.md | 4 ++-- README.md | 2 +- resource/doc/libvmaf.md | 18 ++++++++++++++++++ 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/FAQ.md b/FAQ.md index 265b0ac2a..55aec45ff 100644 --- a/FAQ.md +++ b/FAQ.md @@ -56,12 +56,12 @@ A: This is due to the slightly different workflows used by `run_vmaf_training` a ### Q: How do I use VMAF with downscaled videos? -If you have a distorted video that was scaled down (e.g. for adaptive streaming) and want to calculate VMAF, you can use ffmpeg with `libvmaf` to perform the re-scaling for you. +If you have a distorted video that was scaled down (e.g. for adaptive streaming) and want to calculate VMAF, you can use FFmpeg with `libvmaf` to perform the re-scaling for you. For example, to upscale the distorted video to 1080p: ``` -ffmpeg -i main.mpg -i ref.mpg -filter_complex "[0:v]scale=1920:1080[main];[main][1:v]libvmaf" -f null - +ffmpeg -i main.mpg -i ref.mpg -filter_complex "[0:v]scale=1920x1080:flags=bicubic[main];[main][1:v]libvmaf" -f null - ``` This scales the first input video (`0:v`) and forwards it to VMAF (`libvmaf`) with the label `main`, where it is compared against the second input video, `1:v`. diff --git a/README.md b/README.md index be7bc1eea..e9f7b7d4d 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,7 @@ There are a number of ways one can use the package: - [VMAF Python library](resource/doc/VMAF_Python_library.md) offers full functionalities including running basic VMAF command line, running VMAF on a batch of video files, training and testing a VMAF model on video datasets, and visualization tools, etc. - [`vmafossexec` - a C++ "wrapper" executable](resource/doc/vmafossexec.md) offers running the prediction part of the algorithm in full, such that one can easily deploy VMAF in a production environment without needing to configure the Python dependencies. Additionally, `vmafossexec` offers a number of exclusive features, such as 1) speed optimization using multi-threading and skipping frames, 2) optionally computing PSNR, SSIM and MS-SSIM metrics in the output. - - [`libvmaf.a` - a static library](resource/doc/libvmaf.md) offers an interface to incorporate VMAF into your C/C++ code. Using this library, VMAF is now included as a filter in [FFmpeg](http://ffmpeg.org/) main branch, and can be configured using: `./configure --enable-libvmaf --enable-version3`. See [this](https://ffmpeg.org/ffmpeg-filters.html#libvmaf) section for details. Using FFmpeg with `libvmaf` allows passing in compressed video bitstreams directly to VMAF. + - [`libvmaf.a` - a static library](resource/doc/libvmaf.md) offers an interface to incorporate VMAF into your C/C++ code. Using this library, VMAF is now included as a filter in [FFmpeg](http://ffmpeg.org/) main branch, and can be configured using: `./configure --enable-libvmaf --enable-version3`. See [this](resource/doc/libvmaf.md#use-libvmaf-with-ffmpeg) section for details. Using FFmpeg with `libvmaf` allows passing in compressed video bitstreams directly to VMAF. - [VMAF Dockerfile](Dockerfile) generates a VMAF docker image from the [VMAF Python library](resource/doc/VMAF_Python_library.md). Refer to [this](resource/doc/docker.md) document for detailed usages. - Build VMAF on Windows: follow instructions on [this](resource/doc/BuildForWindows.md) page. diff --git a/resource/doc/libvmaf.md b/resource/doc/libvmaf.md index 3dd2c2874..6a5fc895c 100644 --- a/resource/doc/libvmaf.md +++ b/resource/doc/libvmaf.md @@ -41,3 +41,21 @@ To uninstall the library run: make uninstall ``` +### Use libvmaf with FFmpeg + +After installing `libvmaf.a`, you can use it with FFmpeg. Under FFmpeg directory, configure, build and install FFmpeg with: + +``` +./configure --enable-libvmaf --enable-version3 +make install +``` + +Using FFmpeg with libvmaf is very powerful, as you can create complex filters to calculate VMAF directly on videos of different encoding formats and resolutions. For the best practices of computing VMAF at the right resolution, refer to our [techblog](https://medium.com/netflix-techblog/vmaf-the-journey-continues-44b51ee9ed12). Below is an example on how you can compare a downscaled video with its original 1080p source: + +``` +ffmpeg -i main.mpg -i ref.mpg -filter_complex "[0:v]scale=1920x1080:flags=bicubic[main];[main][1:v]libvmaf" -f null - +``` + +Here `main.mpg` is a downscaled and encoded video and `ref.mpg` is its reference source at 1080p. The command scales the first input video (`0:v`) and forwards it to VMAF (`libvmaf`) with the label `main`, where it is compared against the second input reference video, `1:v`. Bicubic upsampling is used (also see the [techblog](https://medium.com/netflix-techblog/vmaf-the-journey-continues-44b51ee9ed12) for the recommendation on upsampling methods). + +See the [FFmpeg Filtering Guide](https://trac.ffmpeg.org/wiki/FilteringGuide) for more examples of complex filters, and the [Scaling Guide](https://trac.ffmpeg.org/wiki/Scaling) for information about scaling and using different scaling algorithms. From 9f85d5aae7235be5cd3f41ddfc8adc8ae78c70a0 Mon Sep 17 00:00:00 2001 From: Zhi Li Date: Thu, 31 Jan 2019 13:25:55 -0800 Subject: [PATCH 13/29] Update FAQ.md and libvmaf.md with information on using libvmaf with FFmpeg. --- resource/doc/libvmaf.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/resource/doc/libvmaf.md b/resource/doc/libvmaf.md index 6a5fc895c..46c3cd82f 100644 --- a/resource/doc/libvmaf.md +++ b/resource/doc/libvmaf.md @@ -53,7 +53,8 @@ make install Using FFmpeg with libvmaf is very powerful, as you can create complex filters to calculate VMAF directly on videos of different encoding formats and resolutions. For the best practices of computing VMAF at the right resolution, refer to our [techblog](https://medium.com/netflix-techblog/vmaf-the-journey-continues-44b51ee9ed12). Below is an example on how you can compare a downscaled video with its original 1080p source: ``` -ffmpeg -i main.mpg -i ref.mpg -filter_complex "[0:v]scale=1920x1080:flags=bicubic[main];[main][1:v]libvmaf" -f null - +ffmpeg -i main.mpg -i ref.mpg -filter_complex \ +"[0:v]scale=1920x1080:flags=bicubic[main];[main][1:v]libvmaf" -f null - ``` Here `main.mpg` is a downscaled and encoded video and `ref.mpg` is its reference source at 1080p. The command scales the first input video (`0:v`) and forwards it to VMAF (`libvmaf`) with the label `main`, where it is compared against the second input reference video, `1:v`. Bicubic upsampling is used (also see the [techblog](https://medium.com/netflix-techblog/vmaf-the-journey-continues-44b51ee9ed12) for the recommendation on upsampling methods). From fc6b9263c418a53ddc16eef0574d9f20f5c00b39 Mon Sep 17 00:00:00 2001 From: Zhi Li Date: Thu, 31 Jan 2019 13:52:56 -0800 Subject: [PATCH 14/29] Remove adm related non-optimized code. --- feature/src/adm.c | 234 +--------------------------- feature/src/adm_tools.c | 333 ---------------------------------------- feature/src/adm_tools.h | 10 +- 3 files changed, 3 insertions(+), 574 deletions(-) diff --git a/feature/src/adm.c b/feature/src/adm.c index 1123d710c..8e9160b35 100644 --- a/feature/src/adm.c +++ b/feature/src/adm.c @@ -39,10 +39,8 @@ typedef adm_dwt_band_t_s adm_dwt_band_t; #define adm_sum_cube adm_sum_cube_s #define offset_image offset_image_s -#if ADM_OPT_ENABLE - #define adm_csf_den_scale adm_csf_den_scale_s - #define dwt2_src_indices_filt dwt2_src_indices_filt_s -#endif +#define adm_csf_den_scale adm_csf_den_scale_s +#define dwt2_src_indices_filt dwt2_src_indices_filt_s static char *init_dwt_band(adm_dwt_band_t *band, char *data_top, size_t buf_sz_one) { @@ -53,7 +51,6 @@ static char *init_dwt_band(adm_dwt_band_t *band, char *data_top, size_t buf_sz_o return data_top; } -#if ADM_OPT_ENABLE static char *init_dwt_band_hvd(adm_dwt_band_t *band, char *data_top, size_t buf_sz_one) { band->band_a = NULL; @@ -274,233 +271,6 @@ int compute_adm(const float *ref, const float *dis, int w, int h, int ref_stride aligned_free(buf_x_orig); return ret; } -#else // ADM_OPT_ENABLE -int compute_adm(const float *ref, const float *dis, int w, int h, int ref_stride, int dis_stride, double *score, double *score_num, double *score_den, double *scores, double border_factor) -{ -#ifdef ADM_OPT_SINGLE_PRECISION - double numden_limit = 1e-2 * (w * h) / (1920.0 * 1080.0); -#else - double numden_limit = 1e-10 * (w * h) / (1920.0 * 1080.0); -#endif - float *data_buf = 0; - char *data_top; - - float *ref_scale; - float *dis_scale; - - adm_dwt_band_t ref_dwt2; - adm_dwt_band_t dis_dwt2; - - adm_dwt_band_t decouple_r; - adm_dwt_band_t decouple_a; - - adm_dwt_band_t csf_o; - adm_dwt_band_t csf_r; - adm_dwt_band_t csf_a; - - float *mta; - - adm_dwt_band_t cm_r; - - const float *curr_ref_scale = ref; - const float *curr_dis_scale = dis; - int curr_ref_stride = ref_stride; - int curr_dis_stride = dis_stride; - - int orig_h = h; - - int buf_stride = ALIGN_CEIL(((w + 1) / 2) * sizeof(float)); - size_t buf_sz_one = (size_t)buf_stride * ((h + 1) / 2); - - double num = 0; - double den = 0; - - int scale; - int ret = 1; - - if (SIZE_MAX / buf_sz_one < 35) - { - printf("error: SIZE_MAX / buf_sz_one < 35, buf_sz_one = %zu.\n", buf_sz_one); - fflush(stdout); - goto fail; - } - - if (!(data_buf = aligned_malloc(buf_sz_one * 35, MAX_ALIGN))) - { - printf("error: aligned_malloc failed for data_buf.\n"); - fflush(stdout); - goto fail; - } - - data_top = (char *)data_buf; - - ref_scale = (float *)data_top; data_top += buf_sz_one; - dis_scale = (float *)data_top; data_top += buf_sz_one; - - data_top = init_dwt_band(&ref_dwt2, data_top, buf_sz_one); - data_top = init_dwt_band(&dis_dwt2, data_top, buf_sz_one); - data_top = init_dwt_band(&decouple_r, data_top, buf_sz_one); - data_top = init_dwt_band(&decouple_a, data_top, buf_sz_one); - data_top = init_dwt_band(&csf_o, data_top, buf_sz_one); - data_top = init_dwt_band(&csf_r, data_top, buf_sz_one); - data_top = init_dwt_band(&csf_a, data_top, buf_sz_one); - - mta = (float *)data_top; data_top += buf_sz_one; - - data_top = init_dwt_band(&cm_r, data_top, buf_sz_one); - - for (scale = 0; scale < 4; ++scale) { -#ifdef ADM_OPT_DEBUG_DUMP - char pathbuf[256]; -#endif - float num_scale = 0.0; - float den_scale = 0.0; - - adm_dwt2(curr_ref_scale, &ref_dwt2, w, h, curr_ref_stride, buf_stride); - adm_dwt2(curr_dis_scale, &dis_dwt2, w, h, curr_dis_stride, buf_stride); - - w = (w + 1) / 2; - h = (h + 1) / 2; - - adm_decouple(&ref_dwt2, &dis_dwt2, &decouple_r, &decouple_a, w, h, buf_stride, buf_stride, buf_stride, buf_stride); - - adm_csf(&ref_dwt2, &csf_o, orig_h, scale, w, h, buf_stride, buf_stride); - adm_csf(&decouple_r, &csf_r, orig_h, scale, w, h, buf_stride, buf_stride); - adm_csf(&decouple_a, &csf_a, orig_h, scale, w, h, buf_stride, buf_stride); - - adm_cm_thresh(&csf_a, mta, w, h, buf_stride, buf_stride); - adm_cm(&csf_r, &cm_r, mta, w, h, buf_stride, buf_stride, buf_stride); - -#ifdef ADM_OPT_DEBUG_DUMP - sprintf(pathbuf, "stage/ref[%d]_a.yuv", scale); - write_image(pathbuf, ref_dwt2.band_a, w, h, buf_stride, sizeof(float)); - - sprintf(pathbuf, "stage/ref[%d]_h.yuv", scale); - write_image(pathbuf, ref_dwt2.band_h, w, h, buf_stride, sizeof(float)); - - sprintf(pathbuf, "stage/ref[%d]_v.yuv", scale); - write_image(pathbuf, ref_dwt2.band_v, w, h, buf_stride, sizeof(float)); - - sprintf(pathbuf, "stage/ref[%d]_d.yuv", scale); - write_image(pathbuf, ref_dwt2.band_d, w, h, buf_stride, sizeof(float)); - - sprintf(pathbuf, "stage/dis[%d]_a.yuv", scale); - write_image(pathbuf, dis_dwt2.band_a, w, h, buf_stride, sizeof(float)); - - sprintf(pathbuf, "stage/dis[%d]_h.yuv", scale); - write_image(pathbuf, dis_dwt2.band_h, w, h, buf_stride, sizeof(float)); - - sprintf(pathbuf, "stage/dis[%d]_v.yuv", scale); - write_image(pathbuf, dis_dwt2.band_v, w, h, buf_stride, sizeof(float)); - - sprintf(pathbuf, "stage/dis[%d]_d.yuv", scale); - write_image(pathbuf, dis_dwt2.band_d, w, h, buf_stride, sizeof(float)); - - sprintf(pathbuf, "stage/r[%d]_h.yuv", scale); - write_image(pathbuf, decouple_r.band_h, w, h, buf_stride, sizeof(float)); - - sprintf(pathbuf, "stage/r[%d]_v.yuv", scale); - write_image(pathbuf, decouple_r.band_v, w, h, buf_stride, sizeof(float)); - - sprintf(pathbuf, "stage/r[%d]_d.yuv", scale); - write_image(pathbuf, decouple_r.band_d, w, h, buf_stride, sizeof(float)); - - sprintf(pathbuf, "stage/a[%d]_h.yuv", scale); - write_image(pathbuf, decouple_a.band_h, w, h, buf_stride, sizeof(float)); - - sprintf(pathbuf, "stage/a[%d]_v.yuv", scale); - write_image(pathbuf, decouple_a.band_v, w, h, buf_stride, sizeof(float)); - - sprintf(pathbuf, "stage/a[%d]_d.yuv", scale); - write_image(pathbuf, decouple_a.band_d, w, h, buf_stride, sizeof(float)); - - sprintf(pathbuf, "stage/csf_o[%d]_h.yuv", scale); - write_image(pathbuf, csf_o.band_h, w, h, buf_stride, sizeof(float)); - - sprintf(pathbuf, "stage/csf_o[%d]_v.yuv", scale); - write_image(pathbuf, csf_o.band_v, w, h, buf_stride, sizeof(float)); - - sprintf(pathbuf, "stage/csf_o[%d]_d.yuv", scale); - write_image(pathbuf, csf_o.band_d, w, h, buf_stride, sizeof(float)); - - sprintf(pathbuf, "stage/csf_r[%d]_h.yuv", scale); - write_image(pathbuf, csf_r.band_h, w, h, buf_stride, sizeof(float)); - - sprintf(pathbuf, "stage/csf_r[%d]_v.yuv", scale); - write_image(pathbuf, csf_r.band_v, w, h, buf_stride, sizeof(float)); - - sprintf(pathbuf, "stage/csf_r[%d]_d.yuv", scale); - write_image(pathbuf, csf_r.band_d, w, h, buf_stride, sizeof(float)); - - sprintf(pathbuf, "stage/csf_a[%d]_h.yuv", scale); - write_image(pathbuf, csf_a.band_h, w, h, buf_stride, sizeof(float)); - - sprintf(pathbuf, "stage/csf_a[%d]_v.yuv", scale); - write_image(pathbuf, csf_a.band_v, w, h, buf_stride, sizeof(float)); - - sprintf(pathbuf, "stage/csf_a[%d]_d.yuv", scale); - write_image(pathbuf, csf_a.band_d, w, h, buf_stride, sizeof(float)); - - sprintf(pathbuf, "stage/mta[%d].yuv", scale); - write_image(pathbuf, mta, w, h, buf_stride, sizeof(float)); - - sprintf(pathbuf, "stage/cm_r[%d]_h.yuv", scale); - write_image(pathbuf, cm_r.band_h, w, h, buf_stride, sizeof(float)); - - sprintf(pathbuf, "stage/cm_r[%d]_v.yuv", scale); - write_image(pathbuf, cm_r.band_v, w, h, buf_stride, sizeof(float)); - - sprintf(pathbuf, "stage/cm_r[%d]_d.yuv", scale); - write_image(pathbuf, cm_r.band_d, w, h, buf_stride, sizeof(float)); -#endif - num_scale += adm_sum_cube(cm_r.band_h, w, h, buf_stride, border_factor); - num_scale += adm_sum_cube(cm_r.band_v, w, h, buf_stride, border_factor); - num_scale += adm_sum_cube(cm_r.band_d, w, h, buf_stride, border_factor); - - den_scale += adm_sum_cube(csf_o.band_h, w, h, buf_stride, border_factor); - den_scale += adm_sum_cube(csf_o.band_v, w, h, buf_stride, border_factor); - den_scale += adm_sum_cube(csf_o.band_d, w, h, buf_stride, border_factor); - - num += num_scale; - den += den_scale; - - /* Copy DWT2 approximation band to buffer for next scale. */ - adm_buffer_copy(ref_dwt2.band_a, ref_scale, w * sizeof(float), h, buf_stride, buf_stride); - adm_buffer_copy(dis_dwt2.band_a, dis_scale, w * sizeof(float), h, buf_stride, buf_stride); - - curr_ref_scale = ref_scale; - curr_dis_scale = dis_scale; - curr_ref_stride = buf_stride; - curr_dis_stride = buf_stride; -#ifdef ADM_OPT_DEBUG_DUMP - printf("num: %f\n", num); - printf("den: %f\n", den); -#endif - scores[2*scale+0] = num_scale; - scores[2*scale+1] = den_scale; - } - - num = num < numden_limit ? 0 : num; - den = den < numden_limit ? 0 : den; - - if (den == 0.0) - { - *score = 1.0f; - } - else - { - *score = num / den; - } - *score_num = num; - *score_den = den; - - ret = 0; - -fail: - aligned_free(data_buf); - return ret; -} -#endif int adm(int (*read_frame)(float *ref_data, float *main_data, float *temp_data, int stride, void *user_data), void *user_data, int w, int h, const char *fmt) { diff --git a/feature/src/adm_tools.c b/feature/src/adm_tools.c index 5cb57aecd..d5578c706 100644 --- a/feature/src/adm_tools.c +++ b/feature/src/adm_tools.c @@ -48,14 +48,12 @@ static float rcp_s(float x) static const float dwt2_db2_coeffs_lo_s[4] = { 0.482962913144690, 0.836516303737469, 0.224143868041857, -0.129409522550921 }; static const float dwt2_db2_coeffs_hi_s[4] = { -0.129409522550921, -0.224143868041857, 0.836516303737469, -0.482962913144690 }; -#if ADM_OPT_ENABLE #ifndef FLOAT_ONE_BY_30 #define FLOAT_ONE_BY_30 0.0333333351 #endif #ifndef FLOAT_ONE_BY_15 #define FLOAT_ONE_BY_15 0.0666666701 -#endif static const float fcoeff_cm_thresh_s[3][3] = { @@ -93,7 +91,6 @@ float adm_sum_cube_s(const float *x, int w, int h, int stride, double border_fac return powf(accum, 1.0f / 3.0f) + powf((bottom - top) * (right - left) / 32.0f, 1.0f / 3.0f); } -#if ADM_OPT_ENABLE void adm_decouple_s(const adm_dwt_band_t_s *ref, const adm_dwt_band_t_s *dis, const adm_dwt_band_t_s *r, const adm_dwt_band_t_s *a, int w, int h, int ref_stride, int dis_stride, int r_stride, int a_stride, double border_factor) { #ifdef ADM_OPT_AVOID_ATAN @@ -210,107 +207,7 @@ void adm_decouple_s(const adm_dwt_band_t_s *ref, const adm_dwt_band_t_s *dis, co } } } -#else -void adm_decouple_s(const adm_dwt_band_t_s *ref, const adm_dwt_band_t_s *dis, const adm_dwt_band_t_s *r, const adm_dwt_band_t_s *a, int w, int h, int ref_stride, int dis_stride, int r_stride, int a_stride) -{ -#ifdef ADM_OPT_AVOID_ATAN - const float cos_1deg_sq = cos(1.0 * M_PI / 180.0) * cos(1.0 * M_PI / 180.0); -#endif - const float eps = 1e-30; - - int ref_px_stride = ref_stride / sizeof(float); - int dis_px_stride = dis_stride / sizeof(float); - int r_px_stride = r_stride / sizeof(float); - int a_px_stride = a_stride / sizeof(float); - - float oh, ov, od, th, tv, td; - float kh, kv, kd, tmph, tmpv, tmpd; -#ifdef ADM_OPT_AVOID_ATAN - float ot_dp, o_mag_sq, t_mag_sq; -#else - float oa, ta, diff; -#endif - int angle_flag; - int i, j; - - for (i = 0; i < h; ++i) { - for (j = 0; j < w; ++j) { - oh = ref->band_h[i * ref_px_stride + j]; - ov = ref->band_v[i * ref_px_stride + j]; - od = ref->band_d[i * ref_px_stride + j]; - th = dis->band_h[i * dis_px_stride + j]; - tv = dis->band_v[i * dis_px_stride + j]; - td = dis->band_d[i * dis_px_stride + j]; - - kh = DIVS(th, oh + eps); - kv = DIVS(tv, ov + eps); - kd = DIVS(td, od + eps); - - kh = kh < 0.0f ? 0.0f : (kh > 1.0f ? 1.0f : kh); - kv = kv < 0.0f ? 0.0f : (kv > 1.0f ? 1.0f : kv); - kd = kd < 0.0f ? 0.0f : (kd > 1.0f ? 1.0f : kd); - - tmph = kh * oh; - tmpv = kv * ov; - tmpd = kd * od; -#ifdef ADM_OPT_AVOID_ATAN - /* Determine if angle between (oh,ov) and (th,tv) is less than 1 degree. - * Given that u is the angle (oh,ov) and v is the angle (th,tv), this can - * be done by testing the inequvality. - * - * { (u.v.) >= 0 } AND { (u.v)^2 >= cos(1deg)^2 * ||u||^2 * ||v||^2 } - * - * Proof: - * - * cos(theta) = (u.v) / (||u|| * ||v||) - * - * IF u.v >= 0 THEN - * cos(theta)^2 = (u.v)^2 / (||u||^2 * ||v||^2) - * (u.v)^2 = cos(theta)^2 * ||u||^2 * ||v||^2 - * - * IF |theta| < 1deg THEN - * (u.v)^2 >= cos(1deg)^2 * ||u||^2 * ||v||^2 - * END - * ELSE - * |theta| > 90deg - * END - */ - ot_dp = oh * th + ov * tv; - o_mag_sq = oh * oh + ov * ov; - t_mag_sq = th * th + tv * tv; - - angle_flag = (ot_dp >= 0.0f) && (ot_dp * ot_dp >= cos_1deg_sq * o_mag_sq * t_mag_sq); -#else - oa = atanf(DIVS(ov, oh + eps)); - ta = atanf(DIVS(tv, th + eps)); - - if (oh < 0.0f) - oa += (float)M_PI; - if (th < 0.0f) - ta += (float)M_PI; - - diff = fabsf(oa - ta) * 180.0f / M_PI; - angle_flag = diff < 1.0f; -#endif - if (angle_flag) { - tmph = th; - tmpv = tv; - tmpd = td; - } - - r->band_h[i * r_px_stride + j] = tmph; - r->band_v[i * r_px_stride + j] = tmpv; - r->band_d[i * r_px_stride + j] = tmpd; - - a->band_h[i * a_px_stride + j] = th - tmph; - a->band_v[i * a_px_stride + j] = tv - tmpv; - a->band_d[i * a_px_stride + j] = td - tmpd; - } - } -} -#endif -#if ADM_OPT_ENABLE void adm_csf_s(const adm_dwt_band_t_s *src, const adm_dwt_band_t_s *dst, int orig_h, int scale, int w, int h, int src_stride, int dst_stride, double border_factor) { const float *src_angles[3] = { src->band_h, src->band_v, src->band_d }; @@ -360,40 +257,7 @@ void adm_csf_s(const adm_dwt_band_t_s *src, const adm_dwt_band_t_s *dst, int ori } } } -#else -void adm_csf_s(const adm_dwt_band_t_s *src, const adm_dwt_band_t_s *dst, int orig_h, int scale, int w, int h, int src_stride, int dst_stride) -{ - const float *src_angles[3] = { src->band_h, src->band_v, src->band_d }; - float *dst_angles[3] = { dst->band_h, dst->band_v, dst->band_d }; - - const float *src_ptr; - float *dst_ptr; - - int src_px_stride = src_stride / sizeof(float); - int dst_px_stride = dst_stride / sizeof(float); - - // for ADM: scales goes from 0 to 3 but in noise floor paper, it goes from - // 1 to 4 (from finest scale to coarsest scale). - float factor1 = dwt_quant_step(&dwt_7_9_YCbCr_threshold[0], scale, 1); - float factor2 = dwt_quant_step(&dwt_7_9_YCbCr_threshold[0], scale, 2); - float rfactor[3] = {1.0f / factor1, 1.0f / factor1, 1.0f / factor2}; - - int i, j, theta; - - for (theta = 0; theta < 3; ++theta) { - src_ptr = src_angles[theta]; - dst_ptr = dst_angles[theta]; - - for (i = 0; i < h; ++i) { - for (j = 0; j < w; ++j) { - dst_ptr[i * dst_px_stride + j] = rfactor[theta] * src_ptr[i * src_px_stride + j]; - } - } - } -} -#endif -#if ADM_OPT_ENABLE /* Combination of adm_csf_s and adm_sum_cube_s for csf_o based den_scale */ float adm_csf_den_scale_s(const adm_dwt_band_t_s *src, int orig_h, int scale, int w, int h, int src_stride, double border_factor) { @@ -454,9 +318,7 @@ float adm_csf_den_scale_s(const adm_dwt_band_t_s *src, int orig_h, int scale, in return(den_scale_h + den_scale_v + den_scale_d); } -#endif -#if ADM_OPT_ENABLE void adm_cm_thresh_s(const adm_dwt_band_t_s *src, float *dst, int w, int h, int src_stride, int dst_stride) { const float *angles[3] = { src->band_h, src->band_v, src->band_d }; @@ -521,62 +383,7 @@ void adm_cm_thresh_s(const adm_dwt_band_t_s *src, float *dst, int w, int h, int } } -#else -void adm_cm_thresh_s(const adm_dwt_band_t_s *src, float *dst, int w, int h, int src_stride, int dst_stride) -{ - const float *angles[3] = { src->band_h, src->band_v, src->band_d }; - const float *src_ptr; - - int src_px_stride = src_stride / sizeof(float); - int dst_px_stride = dst_stride / sizeof(float); - - float fcoeff, imgcoeff; - - int theta, i, j, fi, fj, ii, jj; - - for (i = 0; i < h; ++i) { - /* Zero output row. */ - for (j = 0; j < w; ++j) { - dst[i * dst_px_stride + j] = 0; - } - for (theta = 0; theta < 3; ++theta) { - src_ptr = angles[theta]; - - for (j = 0; j < w; ++j) { - float accum = 0; - - /* Mean of three convolutions by [1 1 1; 1 2 1; 1 1 1]. */ - for (fi = 0; fi < 3; ++fi) { - for (fj = 0; fj < 3; ++fj) { - fcoeff = (fi == 1 && fj == 1) ? 1.0f / 15.0f : 1.0f / 30.0f; - - ii = i - 1 + fi; - jj = j - 1 + fj; - - /* Border handling by mirroring. */ - if (ii < 0) - ii = -ii; - else if (ii >= h) - ii = 2 * h - ii - 1; - if (jj < 0) - jj = -jj; - else if (jj >= w) - jj = 2 * w - jj - 1; - imgcoeff = fabsf(src_ptr[ii * src_px_stride + jj]); - - accum += fcoeff * imgcoeff; - } - } - - dst[i * dst_px_stride + j] += accum; - } - } - } -} -#endif - -#if ADM_OPT_ENABLE float adm_cm_s(const adm_dwt_band_t_s *src, const adm_dwt_band_t_s *dst, const adm_dwt_band_t_s *csf_a, int w, int h, int src_stride, int dst_stride, int csf_a_stride, double border_factor, int scale) { /* Take decouple_r as src and do dsf_s on decouple_r here to get csf_r */ @@ -818,41 +625,7 @@ float adm_cm_s(const adm_dwt_band_t_s *src, const adm_dwt_band_t_s *dst, const a return (num_scale_h + num_scale_v + num_scale_d); } -#else -void adm_cm_s(const adm_dwt_band_t_s *src, const adm_dwt_band_t_s *dst, const float *thresh, int w, int h, int src_stride, int dst_stride, int thresh_stride) -{ - int src_px_stride = src_stride / sizeof(float); - int dst_px_stride = dst_stride / sizeof(float); - int thresh_px_stride = thresh_stride / sizeof(float); - - float xh, xv, xd, thr; - - int i, j; - - for (i = 0; i < h; ++i) { - for (j = 0; j < w; ++j) { - xh = src->band_h[i * src_px_stride + j]; - xv = src->band_v[i * src_px_stride + j]; - xd = src->band_d[i * src_px_stride + j]; - thr = thresh[i * thresh_px_stride + j]; - - xh = fabsf(xh) - thr; - xv = fabsf(xv) - thr; - xd = fabsf(xd) - thr; - - xh = xh < 0.0f ? 0.0f : xh; - xv = xv < 0.0f ? 0.0f : xv; - xd = xd < 0.0f ? 0.0f : xd; - - dst->band_h[i * dst_px_stride + j] = xh; - dst->band_v[i * dst_px_stride + j] = xv; - dst->band_d[i * dst_px_stride + j] = xd; - } - } -} -#endif -#if ADM_OPT_ENABLE // This function stores the imgcoeff values used in adm_dwt2_s in buffers, which reduces the control code cycles. void dwt2_src_indices_filt_s(int **src_ind_y, int **src_ind_x, int w, int h) { @@ -902,9 +675,6 @@ void dwt2_src_indices_filt_s(int **src_ind_y, int **src_ind_x, int w, int h) } } -#endif - -#if ADM_OPT_ENABLE void adm_dwt2_s(const float *src, const adm_dwt_band_t_s *dst, int **ind_y, int **ind_x, int w, int h, int src_stride, int dst_stride) { const float *filter_lo = dwt2_db2_coeffs_lo_s; @@ -997,109 +767,6 @@ void adm_dwt2_s(const float *src, const adm_dwt_band_t_s *dst, int **ind_y, int aligned_free(tmplo); aligned_free(tmphi); } -#else -void adm_dwt2_s(const float *src, const adm_dwt_band_t_s *dst, int w, int h, int src_stride, int dst_stride) -{ - const float *filter_lo = dwt2_db2_coeffs_lo_s; - const float *filter_hi = dwt2_db2_coeffs_hi_s; - int fwidth = sizeof(dwt2_db2_coeffs_lo_s) / sizeof(float); - - int src_px_stride = src_stride / sizeof(float); - int dst_px_stride = dst_stride / sizeof(float); - - float *tmplo = aligned_malloc(ALIGN_CEIL(sizeof(float) * w), MAX_ALIGN); - float *tmphi = aligned_malloc(ALIGN_CEIL(sizeof(float) * w), MAX_ALIGN); - float fcoeff_lo, fcoeff_hi, imgcoeff; - - int i, j, fi, fj, ii, jj; - - for (i = 0; i < (h + 1) / 2; ++i) { - /* Vertical pass. */ - for (j = 0; j < w; ++j) { - float accum_lo = 0; - float accum_hi = 0; - - for (fi = 0; fi < fwidth; ++fi) { - fcoeff_lo = filter_lo[fi]; - fcoeff_hi = filter_hi[fi]; - - /* Border handling by mirroring. */ - ii = 2 * i - 1 + fi; - - if (ii < 0) - ii = -ii; - else if (ii >= h) - ii = 2 * h - ii - 1; - - imgcoeff = src[ii * src_px_stride + j]; - - accum_lo += fcoeff_lo * imgcoeff; - accum_hi += fcoeff_hi * imgcoeff; - } - - tmplo[j] = accum_lo; - tmphi[j] = accum_hi; - } - - /* Horizontal pass (lo). */ - for (j = 0; j < (w + 1) / 2; ++j) { - float accum_lo = 0; - float accum_hi = 0; - - for (fj = 0; fj < fwidth; ++fj) { - fcoeff_lo = filter_lo[fj]; - fcoeff_hi = filter_hi[fj]; - - /* Border handling by mirroring. */ - jj = 2 * j - 1 + fj; - - if (jj < 0) - jj = -jj; - else if (jj >= w) - jj = 2 * w - jj - 1; - - imgcoeff = tmplo[jj]; - - accum_lo += fcoeff_lo * imgcoeff; - accum_hi += fcoeff_hi * imgcoeff; - } - - dst->band_a[i * dst_px_stride + j] = accum_lo; - dst->band_v[i * dst_px_stride + j] = accum_hi; - } - - /* Horizontal pass (hi). */ - for (j = 0; j < (w + 1) / 2; ++j) { - float accum_lo = 0; - float accum_hi = 0; - - for (fj = 0; fj < fwidth; ++fj) { - fcoeff_lo = filter_lo[fj]; - fcoeff_hi = filter_hi[fj]; - - /* Border handling by mirroring. */ - jj = 2 * j - 1 + fj; - - if (jj < 0) - jj = -jj; - else if (jj >= w) - jj = 2 * w - jj - 1; - - imgcoeff = tmphi[jj]; - - accum_lo += fcoeff_lo * imgcoeff; - accum_hi += fcoeff_hi * imgcoeff; - } - - dst->band_h[i * dst_px_stride + j] = accum_lo; - dst->band_d[i * dst_px_stride + j] = accum_hi; - } - } - - aligned_free(tmplo); - aligned_free(tmphi); -} -#endif void adm_buffer_copy(const void *src, void *dst, int linewidth, int h, int src_stride, int dst_stride) { diff --git a/feature/src/adm_tools.h b/feature/src/adm_tools.h index a9c93f591..c95b1c47c 100644 --- a/feature/src/adm_tools.h +++ b/feature/src/adm_tools.h @@ -25,10 +25,7 @@ #ifndef ADM_TOOLS_H_ #define ADM_TOOLS_H_ -#define ADM_OPT_ENABLE 1 - -#if ADM_OPT_ENABLE -// i = 0, j = 0: indices y: 1,0,1, x: 1,0,1 +// i = 0, j = 0: indices y: 1,0,1, x: 1,0,1 #define ADM_CM_THRESH_S_0_0(angles,src_px_stride,accum,w,h,i,j) \ { \ *accum = 0; \ @@ -125,7 +122,6 @@ } \ *accum = sum; \ } -#endif typedef struct adm_dwt_band_t_s { float *band_a; /* Low-pass V + low-pass H. */ @@ -134,8 +130,6 @@ typedef struct adm_dwt_band_t_s { float *band_d; /* High-pass V + high-pass H. */ } adm_dwt_band_t_s; -#if ADM_OPT_ENABLE - float adm_sum_cube_s(const float *x, int w, int h, int stride, double border_factor); void adm_decouple_s(const adm_dwt_band_t_s *ref, const adm_dwt_band_t_s *dis, const adm_dwt_band_t_s *r, const adm_dwt_band_t_s *a, int w, int h, int ref_stride, int dis_stride, int r_stride, int a_stride, double border_factor); @@ -168,8 +162,6 @@ void adm_dwt2_s(const float *src, const adm_dwt_band_t_s *dst, int w, int h, int void adm_buffer_copy(const void *src, void *dst, int linewidth, int h, int src_stride, int dst_stride); -#endif - /* ================= */ /* Noise floor model */ /* ================= */ From fb80c21392ff2d7d5544492bd26d8304ec597718 Mon Sep 17 00:00:00 2001 From: Zhi Li Date: Thu, 31 Jan 2019 14:17:27 -0800 Subject: [PATCH 15/29] Remove buffer related non-optimized code. --- feature/src/common/blur_array.c | 41 --------- feature/src/common/blur_array.h | 14 --- wrapper/src/combo.c | 154 +------------------------------- wrapper/src/combo.h | 2 - wrapper/src/darray.c | 6 -- 5 files changed, 2 insertions(+), 215 deletions(-) diff --git a/feature/src/common/blur_array.c b/feature/src/common/blur_array.c index 0714e1a34..c4f53a224 100644 --- a/feature/src/common/blur_array.c +++ b/feature/src/common/blur_array.c @@ -21,9 +21,7 @@ int init_blur_array(BLUR_BUF_ARRAY* arr, int array_length, size_t size, size_t a { arr->blur_buf_array[i].frame_idx = -1; arr->blur_buf_array[i].blur_buf = aligned_malloc(size, alignement); -#if BUF_OPT_ENABLE arr->blur_buf_array[i].reference_count = 0; -#endif if (arr->blur_buf_array[i].blur_buf == 0) return 0; @@ -41,7 +39,6 @@ int init_blur_array(BLUR_BUF_ARRAY* arr, int array_length, size_t size, size_t a */ float* get_blur_buf(BLUR_BUF_ARRAY* arr, int search_frame_idx) { -#if BUF_OPT_ENABLE int array_length = arr->actual_length; BLUR_BUF_STRUCT* s = arr->blur_buf_array; float *ret = NULL; @@ -66,31 +63,6 @@ float* get_blur_buf(BLUR_BUF_ARRAY* arr, int search_frame_idx) pthread_mutex_unlock(&arr->block); return ret; -#else - // find item for the search_frame_idx - while (1) - { - pthread_mutex_lock(&arr->block); - - int array_length = arr->actual_length; - BLUR_BUF_STRUCT* s = arr->blur_buf_array; - - for (int i = 0; i < array_length; i++) - { - if (s->frame_idx == search_frame_idx) - { - pthread_mutex_unlock(&arr->block); - return s->blur_buf; - } - - // next array item - s++; - } - - pthread_mutex_unlock(&arr->block); - } -#endif - return 0; } /* @@ -127,11 +99,7 @@ int put_blur_buf(BLUR_BUF_ARRAY* arr, int frame_idx, float* blur_buf) /* * resets the slot in the array to -1 to indicate that the buffer can be used again */ -#if BUF_OPT_ENABLE int release_blur_buf_slot(BLUR_BUF_ARRAY* arr, int search_frame_idx) -#else -int release_blur_buf(BLUR_BUF_ARRAY* arr, int search_frame_idx) -#endif { int ret = 0; int array_length = arr->actual_length; @@ -143,7 +111,6 @@ int release_blur_buf(BLUR_BUF_ARRAY* arr, int search_frame_idx) { if (s->frame_idx == search_frame_idx) { -#if BUF_OPT_ENABLE if(s->reference_count <= 0) { s->frame_idx = -1; @@ -153,10 +120,6 @@ int release_blur_buf(BLUR_BUF_ARRAY* arr, int search_frame_idx) { ret = -1; } -#else - s->frame_idx = -1; - ret = 1; -#endif break; } @@ -189,7 +152,6 @@ void free_blur_buf(BLUR_BUF_ARRAY* arr) pthread_mutex_destroy(&arr->block); } -#if BUF_OPT_ENABLE /* * finds a free slot in the array, assigns the new frame index and returns the free buffer pointer * This increases the reference count for this slot @@ -278,6 +240,3 @@ int release_blur_buf_reference(BLUR_BUF_ARRAY* arr, int search_frame_idx) return ret; } - - -#endif diff --git a/feature/src/common/blur_array.h b/feature/src/common/blur_array.h index 1209a590f..ddf38125b 100644 --- a/feature/src/common/blur_array.h +++ b/feature/src/common/blur_array.h @@ -12,12 +12,6 @@ #include "pthread.h" #include "alloc.h" -#ifdef MULTI_THREADING -#define BUF_OPT_ENABLE 1 -#else -#define BUF_OPT_ENABLE 0 -#endif - #define MAX_NUM_THREADS 128 typedef struct { @@ -39,8 +33,6 @@ typedef struct int init_blur_array(BLUR_BUF_ARRAY* arr, int array_length, size_t size, size_t alignement); -#if BUF_OPT_ENABLE - float* get_free_blur_buf_slot(BLUR_BUF_ARRAY* arr, int frame_idx); int get_blur_buf_reference_count(BLUR_BUF_ARRAY* arr, int frame_idx); @@ -49,12 +41,6 @@ int release_blur_buf_slot(BLUR_BUF_ARRAY* arr, int search_frame_idx); int release_blur_buf_reference(BLUR_BUF_ARRAY* arr, int search_frame_idx); -#else - -int release_blur_buf(BLUR_BUF_ARRAY* arr, int search_frame_idx); - -#endif - float* get_blur_buf(BLUR_BUF_ARRAY* arr, int search_frame_idx); int put_blur_buf(BLUR_BUF_ARRAY* arr, int frame_idx, float* blur_buf); diff --git a/wrapper/src/combo.c b/wrapper/src/combo.c index c96c451b9..d12526b9f 100644 --- a/wrapper/src/combo.c +++ b/wrapper/src/combo.c @@ -99,9 +99,7 @@ void* combo_threadfunc(void* vmaf_thread_data) int ret = 0; bool next_frame_read; -#if BUF_OPT_ENABLE bool offset_flag; -#endif #ifdef MULTI_THREADING float *prev_blur_buf_ = 0; @@ -110,46 +108,6 @@ void* combo_threadfunc(void* vmaf_thread_data) float *blur_buf_ = 0; #endif -#if !BUF_OPT_ENABLE - if (!(ref_buf = aligned_malloc(data_sz, MAX_ALIGN))) - { - sprintf(errmsg, "aligned_malloc failed for ref_buf.\n"); - goto fail_or_end; - } - if (!(next_ref_buf = aligned_malloc(data_sz, MAX_ALIGN))) - { - sprintf(errmsg, "aligned_malloc failed for next_ref_buf.\n"); - goto fail_or_end; - } - - if (!(dis_buf = aligned_malloc(data_sz, MAX_ALIGN))) - { - sprintf(errmsg, "aligned_malloc failed for dis_buf.\n"); - goto fail_or_end; - } - if (!(next_dis_buf = aligned_malloc(data_sz, MAX_ALIGN))) - { - sprintf(errmsg, "aligned_malloc failed for next_dis_buf.\n"); - goto fail_or_end; - } - - if (!(prev_blur_buf = aligned_malloc(data_sz, MAX_ALIGN))) - { - sprintf(errmsg, "aligned_malloc failed for prev_blur_buf.\n"); - goto fail_or_end; - } - if (!(blur_buf = aligned_malloc(data_sz, MAX_ALIGN))) - { - sprintf(errmsg, "aligned_malloc failed for blur_buf.\n"); - goto fail_or_end; - } - if (!(next_blur_buf = aligned_malloc(data_sz, MAX_ALIGN))) - { - sprintf(errmsg, "aligned_malloc failed for next_blur_buf.\n"); - goto fail_or_end; - } -#endif - // use temp_buf for convolution_f32_c, and fread u and v if (!(temp_buf = aligned_malloc(data_sz * 2, MAX_ALIGN))) { @@ -179,7 +137,6 @@ void* combo_threadfunc(void* vmaf_thread_data) if (frm_idx == 0) { -#if BUF_OPT_ENABLE // Allocating the free buffers from buffer array blur_buf = get_free_blur_buf_slot(&thread_data->blur_buf_array, frm_idx); ref_buf = get_free_blur_buf_slot(&thread_data->ref_buf_array, frm_idx); @@ -194,7 +151,6 @@ void* combo_threadfunc(void* vmaf_thread_data) #endif goto fail_or_end; } -#endif // read frame from file @@ -231,16 +187,12 @@ void* combo_threadfunc(void* vmaf_thread_data) convolution_f32_c(FILTER_5, 5, ref_buf, blur_buf, temp_buf, w, h, stride / sizeof(float), stride / sizeof(float)); #ifdef MULTI_THREADING -#if !BUF_OPT_ENABLE - put_blur_buf(&thread_data->blur_buf_array, frm_idx, blur_buf); -#endif #endif } #ifdef MULTI_THREADING else { -#if BUF_OPT_ENABLE // retrieve from buffer array ref_buf = get_blur_buf(&thread_data->ref_buf_array, frm_idx); dis_buf = get_blur_buf(&thread_data->dis_buf_array, frm_idx); @@ -255,25 +207,9 @@ void* combo_threadfunc(void* vmaf_thread_data) #endif goto fail_or_end; } -#else - // retrieve from buffer array - - ref_buf_ = get_blur_buf(&thread_data->ref_buf_array, frm_idx); - memcpy(ref_buf, ref_buf_, data_sz); - release_blur_buf(&thread_data->ref_buf_array, frm_idx); - - dis_buf_ = get_blur_buf(&thread_data->dis_buf_array, frm_idx); - memcpy(dis_buf, dis_buf_, data_sz); - release_blur_buf(&thread_data->dis_buf_array, frm_idx); - - blur_buf_ = get_blur_buf(&thread_data->blur_buf_array, frm_idx); - memcpy(blur_buf, blur_buf_, data_sz); - // don't releave blur_buf_array of frm_idx yet, since it will be used by the next frame again -#endif } #endif -#if BUF_OPT_ENABLE // Allocate free buffer from the buffer array for next frame index next_ref_buf = get_free_blur_buf_slot(&thread_data->ref_buf_array, frm_idx + 1); next_dis_buf = get_free_blur_buf_slot(&thread_data->dis_buf_array, frm_idx + 1); @@ -286,7 +222,6 @@ void* combo_threadfunc(void* vmaf_thread_data) #endif goto fail_or_end; } -#endif ret = thread_data->read_frame(next_ref_buf, next_dis_buf, temp_buf, stride, user_data); if (ret == 1) @@ -309,15 +244,8 @@ void* combo_threadfunc(void* vmaf_thread_data) next_frame_read = true; } -#if !BUF_OPT_ENABLE -#ifdef MULTI_THREADING - pthread_mutex_unlock(&thread_data->mutex_readframe); -#endif -#endif - if (next_frame_read) { -#if BUF_OPT_ENABLE next_blur_buf = get_free_blur_buf_slot(&thread_data->blur_buf_array, frm_idx + 1); if(NULL == next_blur_buf) { @@ -327,7 +255,6 @@ void* combo_threadfunc(void* vmaf_thread_data) #endif goto fail_or_end; } -#endif // =============================================================== // offset pixel by OPT_RANGE_PIXEL_OFFSET // =============================================================== @@ -342,23 +269,13 @@ void* combo_threadfunc(void* vmaf_thread_data) // =============================================================== convolution_f32_c(FILTER_5, 5, next_ref_buf, next_blur_buf, temp_buf, w, h, stride / sizeof(float), stride / sizeof(float)); -#if !BUF_OPT_ENABLE -#ifdef MULTI_THREADING - // save next_ref_buf, next_ref_buf and next_ref_buf to buffer array - put_blur_buf(&thread_data->ref_buf_array, frm_idx + 1, next_ref_buf); - put_blur_buf(&thread_data->dis_buf_array, frm_idx + 1, next_dis_buf); - put_blur_buf(&thread_data->blur_buf_array, frm_idx + 1, next_blur_buf); -#endif -#endif } -#if BUF_OPT_ENABLE // release ref and dis buffer references after blur buf computation release_blur_buf_reference(&thread_data->ref_buf_array, frm_idx + 1); release_blur_buf_reference(&thread_data->dis_buf_array, frm_idx + 1); #ifdef MULTI_THREADING pthread_mutex_unlock(&thread_data->mutex_readframe); -#endif #endif dbg_printf("frame: %d, ", frm_idx); @@ -367,7 +284,6 @@ void* combo_threadfunc(void* vmaf_thread_data) // step they have been offset by OPT_RANGE_PIXEL_OFFSET, now // offset them back. // =============================================================== -#if BUF_OPT_ENABLE // offset back the buffers only if required if (frm_idx % n_subsample == 0 && ( (thread_data->psnr_array != NULL) || (thread_data->ssim_array != NULL) || (thread_data->ms_ssim_array != NULL) )) { @@ -375,10 +291,6 @@ void* combo_threadfunc(void* vmaf_thread_data) offset_image(dis_buf, -OPT_RANGE_PIXEL_OFFSET, w, h, stride); offset_flag = true; } -#else - offset_image(ref_buf, -OPT_RANGE_PIXEL_OFFSET, w, h, stride); - offset_image(dis_buf, -OPT_RANGE_PIXEL_OFFSET, w, h, stride); -#endif if (frm_idx % n_subsample == 0 && thread_data->psnr_array != NULL) { /* =========== psnr ============== */ @@ -427,17 +339,12 @@ void* combo_threadfunc(void* vmaf_thread_data) // =============================================================== // for the rest, offset pixel by OPT_RANGE_PIXEL_OFFSET // =============================================================== -#if BUF_OPT_ENABLE if(offset_flag) { offset_image(ref_buf, OPT_RANGE_PIXEL_OFFSET, w, h, stride); offset_image(dis_buf, OPT_RANGE_PIXEL_OFFSET, w, h, stride); offset_flag = false; } -#else - offset_image(ref_buf, OPT_RANGE_PIXEL_OFFSET, w, h, stride); - offset_image(dis_buf, OPT_RANGE_PIXEL_OFFSET, w, h, stride); -#endif /* =========== adm ============== */ if (frm_idx % n_subsample == 0) @@ -519,7 +426,6 @@ void* combo_threadfunc(void* vmaf_thread_data) else { #ifdef MULTI_THREADING -#if BUF_OPT_ENABLE // avoid multiple memory copies prev_blur_buf = get_blur_buf(&thread_data->blur_buf_array, frm_idx - 1); if(NULL == prev_blur_buf) @@ -528,10 +434,6 @@ void* combo_threadfunc(void* vmaf_thread_data) sprintf(errmsg, "Data not available for prev_blur_buf.\n"); goto fail_or_end; } -#else - prev_blur_buf_ = get_blur_buf(&thread_data->blur_buf_array, frm_idx - 1); - memcpy(prev_blur_buf, prev_blur_buf_, data_sz); -#endif #endif if ((ret = compute_motion(prev_blur_buf, blur_buf, w, h, stride, stride, &score))) { @@ -539,11 +441,7 @@ void* combo_threadfunc(void* vmaf_thread_data) goto fail_or_end; } #ifdef MULTI_THREADING -#if BUF_OPT_ENABLE release_blur_buf_reference(&thread_data->blur_buf_array, frm_idx - 1); -#else - release_blur_buf(&thread_data->blur_buf_array, frm_idx - 1); -#endif #endif if (next_frame_read) @@ -559,9 +457,6 @@ void* combo_threadfunc(void* vmaf_thread_data) { score2 = score; #ifdef MULTI_THREADING -#if !BUF_OPT_ENABLE - release_blur_buf(&thread_data->blur_buf_array, frm_idx); // no more next frames, release this one too -#endif #endif } } @@ -573,27 +468,9 @@ void* combo_threadfunc(void* vmaf_thread_data) insert_array_at(thread_data->motion2_array, score2, frm_idx); } -#if !BUF_OPT_ENABLE - else - { -#ifdef MULTI_THREADING - if (frm_idx == 0) {} - else - { - release_blur_buf(&thread_data->blur_buf_array, frm_idx - 1); - if (next_frame_read) {} - else - { - release_blur_buf(&thread_data->blur_buf_array, frm_idx); // no more next frames, release this one too - } - } -#endif - } -#else /* Indicate that motion score computation for this frame is complete */ insert_array_at(thread_data->motion_score_compute_flag_array, 1.0, frm_idx); release_blur_buf_reference(&thread_data->blur_buf_array, frm_idx + 1); -#endif /* =========== vif ============== */ @@ -630,7 +507,6 @@ void* combo_threadfunc(void* vmaf_thread_data) dbg_printf("\n"); -#if BUF_OPT_ENABLE //Release references to reference and distorted buffers release_blur_buf_reference(&thread_data->ref_buf_array, frm_idx); release_blur_buf_reference(&thread_data->dis_buf_array, frm_idx); @@ -676,15 +552,6 @@ void* combo_threadfunc(void* vmaf_thread_data) release_blur_buf_slot(&thread_data->dis_buf_array, frm_idx + 1); release_blur_buf_slot(&thread_data->blur_buf_array, frm_idx); } -#else -#ifndef MULTI_THREADING - // copy to prev_buf - memcpy(prev_blur_buf, blur_buf, data_sz); - memcpy(ref_buf, next_ref_buf, data_sz); - memcpy(dis_buf, next_dis_buf, data_sz); - memcpy(blur_buf, next_blur_buf, data_sz); -#endif -#endif if (!next_frame_read) { @@ -698,15 +565,6 @@ void* combo_threadfunc(void* vmaf_thread_data) fail_or_end: -#if !BUF_OPT_ENABLE - aligned_free(ref_buf); - aligned_free(dis_buf); - aligned_free(prev_blur_buf); - aligned_free(next_ref_buf); - aligned_free(next_dis_buf); - aligned_free(next_blur_buf); - aligned_free(blur_buf); -#endif aligned_free(temp_buf); #ifdef MULTI_THREADING @@ -788,11 +646,9 @@ int combo(int (*read_frame)(float *ref_data, float *main_data, float *temp_data, combo_thread_data.stop_threads = 0; combo_thread_data.n_subsample = n_subsample; -#if BUF_OPT_ENABLE DArray motion_score_compute_flag_array; init_array(&motion_score_compute_flag_array, 1000); combo_thread_data.motion_score_compute_flag_array = &motion_score_compute_flag_array; -#endif // sanity check for width/height if (w <= 0 || h <= 0 || (size_t)w > ALIGN_FLOOR(INT_MAX) / sizeof(float)) @@ -827,7 +683,7 @@ int combo(int (*read_frame)(float *ref_data, float *main_data, float *temp_data, } // for motion analysis we compare to previous buffer and next buffer -#if BUF_OPT_ENABLE + /* * In the multi-thread mode, allocate a fixed size buffer pool for the reference, distorted and blur buffers. * At any point, the no. of required ref and dis buffers is 1 more than the total no. of allotted threads, @@ -838,11 +694,7 @@ int combo(int (*read_frame)(float *ref_data, float *main_data, float *temp_data, init_blur_array(&combo_thread_data.ref_buf_array, combo_thread_data.thread_count + 1, combo_thread_data.data_sz, MAX_ALIGN); init_blur_array(&combo_thread_data.dis_buf_array, combo_thread_data.thread_count + 1, combo_thread_data.data_sz, MAX_ALIGN); init_blur_array(&combo_thread_data.blur_buf_array, 3 * (combo_thread_data.thread_count), combo_thread_data.data_sz, MAX_ALIGN); -#else - init_blur_array(&combo_thread_data.ref_buf_array, combo_thread_data.thread_count, combo_thread_data.data_sz, MAX_ALIGN); - init_blur_array(&combo_thread_data.dis_buf_array, combo_thread_data.thread_count, combo_thread_data.data_sz, MAX_ALIGN); - init_blur_array(&combo_thread_data.blur_buf_array, combo_thread_data.thread_count + 2, combo_thread_data.data_sz, MAX_ALIGN); -#endif + // initialize the mutex that protects the read_frame function pthread_mutex_init(&combo_thread_data.mutex_readframe, NULL); @@ -881,9 +733,7 @@ int combo(int (*read_frame)(float *ref_data, float *main_data, float *temp_data, free_blur_buf(&combo_thread_data.dis_buf_array); free_blur_buf(&combo_thread_data.blur_buf_array); -#if BUF_OPT_ENABLE free_array(&motion_score_compute_flag_array); -#endif free(thread); diff --git a/wrapper/src/combo.h b/wrapper/src/combo.h index 1b335cc64..2c5473ec8 100644 --- a/wrapper/src/combo.h +++ b/wrapper/src/combo.h @@ -78,9 +78,7 @@ typedef struct BLUR_BUF_ARRAY blur_buf_array; BLUR_BUF_ARRAY ref_buf_array; BLUR_BUF_ARRAY dis_buf_array; -#if BUF_OPT_ENABLE DArray *motion_score_compute_flag_array; -#endif #endif int ret; diff --git a/wrapper/src/darray.c b/wrapper/src/darray.c index a0fcf0a1e..90447b93e 100644 --- a/wrapper/src/darray.c +++ b/wrapper/src/darray.c @@ -23,9 +23,7 @@ void init_array(DArray *a, size_t init_size) { a->array = (double *)malloc(init_size * sizeof(double)); -#if BUF_OPT_ENABLE memset(a->array, 0.0, init_size * sizeof(double)); -#endif a->used = 0; a->size = init_size; #ifdef MULTI_THREADING @@ -41,12 +39,10 @@ void insert_array(DArray *a, double e) if (a->used == a->size) { a->size *= 2; -#if BUF_OPT_ENABLE double *temp; temp = a->array; temp += (a->size / 2); memset(temp, 0.0, (a->size / 2) * sizeof(double)); -#endif a->array = (double *)realloc(a->array, a->size * sizeof(double)); } a->array[a->used++] = e; @@ -69,12 +65,10 @@ void insert_array_at(DArray *a, double e, int pos) { a->size *= 2; a->array = (double *)realloc(a->array, a->size * sizeof(double)); -#if BUF_OPT_ENABLE double *temp; temp = a->array; temp += (a->size / 2); memset(temp, 0.0, (a->size / 2) * sizeof(double)); -#endif } a->array[pos] = e; #ifdef MULTI_THREADING From 4370c12d56436343fa8dd804a9d78e76b3f577e2 Mon Sep 17 00:00:00 2001 From: Zhi Li Date: Thu, 31 Jan 2019 14:31:21 -0800 Subject: [PATCH 16/29] Remove vif related non-optimized code. --- feature/src/common/convolution.h | 3 +- feature/src/common/convolution_avx.c | 5 +- feature/src/common/convolution_internal.h | 3 +- feature/src/vif.c | 77 +---------------------- feature/src/vif_options.h | 6 -- feature/src/vif_tools.c | 64 +------------------ feature/src/vif_tools.h | 2 - 7 files changed, 8 insertions(+), 152 deletions(-) diff --git a/feature/src/common/convolution.h b/feature/src/common/convolution.h index df7990018..1fb6344e0 100644 --- a/feature/src/common/convolution.h +++ b/feature/src/common/convolution.h @@ -37,9 +37,8 @@ void convolution_f32_c_s(const float *filter, int filter_width, const float *src void convolution_f32_avx_s(const float *filter, int filter_width, const float *src, float *dst, float *tmp, int width, int height, int src_stride, int dst_stride); -#if VIF_OPT_ENABLE void convolution_f32_avx_sq_s(const float *filter, int filter_width, const float *src, float *dst, float *tmp, int width, int height, int src_stride, int dst_stride); void convolution_f32_avx_xy_s(const float *filter, int filter_width, const float *src1, const float *src2, float *dst, float *tmp, int width, int height, int src1_stride, int src2_stride, int dst_stride); -#endif + #endif // CONVOLUTION_H_ diff --git a/feature/src/common/convolution_avx.c b/feature/src/common/convolution_avx.c index 44d22c504..c11d4540d 100644 --- a/feature/src/common/convolution_avx.c +++ b/feature/src/common/convolution_avx.c @@ -29,7 +29,6 @@ FORCE_INLINE inline void convolution_f32_avx_s_1d_v_scanline_5(const float * RES FORCE_INLINE inline void convolution_f32_avx_s_1d_v_scanline_9(const float * RESTRICT filter, int filter_width, const float * RESTRICT src, float * RESTRICT dst, int src_stride, int j_end); FORCE_INLINE inline void convolution_f32_avx_s_1d_v_scanline_17(const float * RESTRICT filter, int filter_width, const float * RESTRICT src, float * RESTRICT dst, int src_stride, int j_end); -#if VIF_OPT_ENABLE FORCE_INLINE inline void convolution_f32_avx_s_1d_h_sq_scanline_5(const float * RESTRICT filter, int filter_width, const float * RESTRICT src, float * RESTRICT dst, int j_end); FORCE_INLINE inline void convolution_f32_avx_s_1d_h_sq_scanline_9(const float * RESTRICT filter, int filter_width, const float * RESTRICT src, float * RESTRICT dst, int j_end); FORCE_INLINE inline void convolution_f32_avx_s_1d_h_sq_scanline_17(const float * RESTRICT filter, int filter_width, const float * RESTRICT src, float * RESTRICT dst, int j_end); @@ -43,7 +42,7 @@ FORCE_INLINE inline void convolution_f32_avx_s_1d_h_xy_scanline_17(const float * FORCE_INLINE inline void convolution_f32_avx_s_1d_v_xy_scanline_5(const float * RESTRICT filter, int filter_width, const float * RESTRICT src1, const float * RESTRICT src2, float * RESTRICT dst, int src1_stride, int src2_stride, int j_end); FORCE_INLINE inline void convolution_f32_avx_s_1d_v_xy_scanline_9(const float * RESTRICT filter, int filter_width, const float * RESTRICT src1, const float * RESTRICT src2, float * RESTRICT dst, int src1_stride, int src2_stride, int j_end); FORCE_INLINE inline void convolution_f32_avx_s_1d_v_xy_scanline_17(const float * RESTRICT filter, int filter_width, const float * RESTRICT src1, const float * RESTRICT src2, float * RESTRICT dst, int src1_stride, int src2_stride, int j_end); -#endif + FORCE_INLINE inline static void convolution_f32_avx_s_3x3_2d_scanline(const float * RESTRICT filter, const float * RESTRICT src, float * RESTRICT dst, int src_stride, int j_end) { __m256 f00, f01, f02, f10, f11, f12, f20, f21, f22; @@ -873,7 +872,6 @@ void convolution_f32_avx_s(const float *filter, int filter_width, const float *s } } -#if VIF_OPT_ENABLE // Filter a single scanline. FORCE_INLINE inline static void convolution_f32_avx_s_1d_h_sq_scanline(int N, const float * RESTRICT filter, int filter_width, const float * RESTRICT src, float * RESTRICT dst, int j_end) { @@ -2647,4 +2645,3 @@ void convolution_f32_avx_xy_s(const float *filter, int filter_width, const float break; } } -#endif diff --git a/feature/src/common/convolution_internal.h b/feature/src/common/convolution_internal.h index 1bf5a3863..7dac2d21a 100644 --- a/feature/src/common/convolution_internal.h +++ b/feature/src/common/convolution_internal.h @@ -51,7 +51,6 @@ FORCE_INLINE inline float convolution_edge_s(bool horizontal, const float *filte return accum; } -#if VIF_OPT_ENABLE FORCE_INLINE inline float convolution_edge_sq_s(bool horizontal, const float *filter, int filter_width, const float *src, int width, int height, int stride, int i, int j) { int radius = filter_width / 2; @@ -110,5 +109,5 @@ FORCE_INLINE inline float convolution_edge_xy_s(bool horizontal, const float *fi } return accum; } -#endif + #endif // CONVOLUTION_INTERNAL_H_ diff --git a/feature/src/vif.c b/feature/src/vif.c index b3f13169b..65faa1968 100644 --- a/feature/src/vif.c +++ b/feature/src/vif.c @@ -42,10 +42,9 @@ #define vif_statistic vif_statistic_s #define offset_image offset_image_s -#if VIF_OPT_ENABLE #define vif_filter1d_sq vif_filter1d_sq_s #define vif_filter1d_xy vif_filter1d_xy_s -#endif + int compute_vif(const float *ref, const float *dis, int w, int h, int ref_stride, int dis_stride, double *score, double *score_num, double *score_den, double *scores) { float *data_buf = 0; @@ -64,17 +63,8 @@ int compute_vif(const float *ref, const float *dis, int w, int h, int ref_stride float *ref_dis_filt; float *tmpbuf; - -#if VIF_OPT_ENABLE - float *num_array; - float *den_array; -#else - float *mu1_sq; - float *mu2_sq; - float *mu1_mu2; float *num_array; float *den_array; -#endif /* Offset pointers to adjust for convolution border handling. */ float *mu1_adj = 0; @@ -106,8 +96,8 @@ int compute_vif(const float *ref, const float *dis, int w, int h, int ref_stride int scale; int ret = 1; -#if VIF_OPT_ENABLE - // Code optimized to save on multiple buffer copies + + // Code optimized to save on multiple buffer copies // hence the reduction in the number of buffers required from 15 to 10 #define VIF_BUF_CNT 10 if (SIZE_MAX / buf_sz_one < VIF_BUF_CNT) @@ -136,41 +126,6 @@ int compute_vif(const float *ref, const float *dis, int w, int h, int ref_stride num_array = (float *)data_top; data_top += buf_sz_one; den_array = (float *)data_top; data_top += buf_sz_one; tmpbuf = (float *)data_top; data_top += buf_sz_one; -#else - - if (SIZE_MAX / buf_sz_one < 15) - { - printf("error: SIZE_MAX / buf_sz_one < 15, buf_sz_one = %zu.\n", buf_sz_one); - fflush(stdout); - goto fail_or_end; - } - - if (!(data_buf = aligned_malloc(buf_sz_one * 16, MAX_ALIGN))) - { - printf("error: aligned_malloc failed for data_buf.\n"); - fflush(stdout); - goto fail_or_end; - } - - data_top = (char *)data_buf; - - ref_scale = (float *)data_top; data_top += buf_sz_one; - dis_scale = (float *)data_top; data_top += buf_sz_one; - ref_sq = (float *)data_top; data_top += buf_sz_one; - dis_sq = (float *)data_top; data_top += buf_sz_one; - ref_dis = (float *)data_top; data_top += buf_sz_one; - mu1 = (float *)data_top; data_top += buf_sz_one; - mu2 = (float *)data_top; data_top += buf_sz_one; - mu1_sq = (float *)data_top; data_top += buf_sz_one; - mu2_sq = (float *)data_top; data_top += buf_sz_one; - mu1_mu2 = (float *)data_top; data_top += buf_sz_one; - ref_sq_filt = (float *)data_top; data_top += buf_sz_one; - dis_sq_filt = (float *)data_top; data_top += buf_sz_one; - ref_dis_filt = (float *)data_top; data_top += buf_sz_one; - num_array = (float *)data_top; data_top += buf_sz_one; - den_array = (float *)data_top; data_top += buf_sz_one; - tmpbuf = (float *)data_top; data_top += buf_sz_one; -#endif for (scale = 0; scale < 4; ++scale) { @@ -237,36 +192,19 @@ int compute_vif(const float *ref, const float *dis, int w, int h, int ref_stride vif_filter2d(filter, curr_ref_scale, mu1, w, h, curr_ref_stride, buf_stride, filter_width); vif_filter2d(filter, curr_dis_scale, mu2, w, h, curr_dis_stride, buf_stride, filter_width); #endif -#if !VIF_OPT_ENABLE - vif_xx_yy_xy(mu1, mu2, mu1_sq, mu2_sq, mu1_mu2, w, h, buf_stride, buf_stride, buf_stride, buf_stride, buf_stride); - - vif_xx_yy_xy(curr_ref_scale, curr_dis_scale, ref_sq, dis_sq, ref_dis, w, h, curr_ref_stride, curr_dis_stride, buf_stride, buf_stride, buf_stride); -#endif #ifdef VIF_OPT_FILTER_1D -#if VIF_OPT_ENABLE - // Code optimized by adding intrinsic code for the functions, // vif_filter1d_sq and vif_filter1d_sq vif_filter1d_sq(filter, curr_ref_scale, ref_sq_filt, tmpbuf, w, h, curr_ref_stride, buf_stride, filter_width); vif_filter1d_sq(filter, curr_dis_scale, dis_sq_filt, tmpbuf, w, h, curr_dis_stride, buf_stride, filter_width); vif_filter1d_xy(filter, curr_ref_scale, curr_dis_scale, ref_dis_filt, tmpbuf, w, h, curr_ref_stride, curr_dis_stride, buf_stride, filter_width); -#else - vif_filter1d(filter, ref_sq, ref_sq_filt, tmpbuf, w, h, buf_stride, buf_stride, filter_width); - vif_filter1d(filter, dis_sq, dis_sq_filt, tmpbuf, w, h, buf_stride, buf_stride, filter_width); - vif_filter1d(filter, ref_dis, ref_dis_filt, tmpbuf, w, h, buf_stride, buf_stride, filter_width); -#endif #else vif_filter2d(filter, ref_sq, ref_sq_filt, w, h, buf_stride, buf_stride, filter_width); vif_filter2d(filter, dis_sq, dis_sq_filt, w, h, buf_stride, buf_stride, filter_width); vif_filter2d(filter, ref_dis, ref_dis_filt, w, h, buf_stride, buf_stride, filter_width); #endif -#if VIF_OPT_ENABLE vif_statistic(mu1, mu2, NULL, ref_sq_filt, dis_sq_filt, ref_dis_filt, num_array, den_array, w, h, buf_stride, buf_stride, buf_stride, buf_stride, buf_stride, buf_stride, buf_stride, buf_stride); -#else - vif_statistic(mu1_sq, mu2_sq, mu1_mu2, ref_sq_filt, dis_sq_filt, ref_dis_filt, num_array, den_array, - w, h, buf_stride, buf_stride, buf_stride, buf_stride, buf_stride, buf_stride, buf_stride, buf_stride); -#endif mu1_adj = ADJUST(mu1); mu2_adj = ADJUST(mu2); @@ -276,10 +214,6 @@ int compute_vif(const float *ref, const float *dis, int w, int h, int ref_stride ref_dis_filt_adj = ADJUST(ref_dis_filt); #endif -#if !VIF_OPT_ENABLE - num_array_adj = ADJUST(num_array); - den_array_adj = ADJUST(den_array); -#endif #undef ADJUST #ifdef VIF_OPT_DEBUG_DUMP @@ -311,13 +245,8 @@ int compute_vif(const float *ref, const float *dis, int w, int h, int ref_stride write_image(pathbuf, den_array_adj, buf_valid_w, buf_valid_h, buf_stride, sizeof(float)); #endif -#if VIF_OPT_ENABLE num = *num_array; den = *den_array; -#else - num = vif_sum(num_array_adj, buf_valid_w, buf_valid_h, buf_stride); - den = vif_sum(den_array_adj, buf_valid_w, buf_valid_h, buf_stride); -#endif scores[2*scale] = num; scores[2*scale+1] = den; diff --git a/feature/src/vif_options.h b/feature/src/vif_options.h index 61fe2ae49..ef1f93b2e 100644 --- a/feature/src/vif_options.h +++ b/feature/src/vif_options.h @@ -36,10 +36,4 @@ /* Whether to use a 1-D formulation of the Gaussian filter. */ #define VIF_OPT_FILTER_1D -/* VIF optimizations are enabled only for ID filter */ -#ifdef VIF_OPT_FILTER_1D -#define VIF_OPT_ENABLE 1 -#else -#define VIF_OPT_ENABLE 0 -#endif #endif /* VIF_OPTIONS_H_ */ diff --git a/feature/src/vif_tools.c b/feature/src/vif_tools.c index 8f753fec5..6e66a3355 100644 --- a/feature/src/vif_tools.c +++ b/feature/src/vif_tools.c @@ -213,7 +213,6 @@ void vif_xx_yy_xy_s(const float *x, const float *y, float *xx, float *yy, float } } -#if VIF_OPT_ENABLE void vif_statistic_s(const float *mu1, const float *mu2, const float *mu1_mu2, const float *xx_filt, const float *yy_filt, const float *xy_filt, float *num, float *den, int w, int h, int mu1_stride, int mu2_stride, int mu1_mu2_stride, int xx_filt_stride, int yy_filt_stride, int xy_filt_stride, int num_stride, int den_stride) { @@ -282,64 +281,6 @@ void vif_statistic_s(const float *mu1, const float *mu2, const float *mu1_mu2, c num[0] = accum_num; den[0] = accum_den; } -#else -void vif_statistic_s(const float *mu1_sq, const float *mu2_sq, const float *mu1_mu2, const float *xx_filt, const float *yy_filt, const float *xy_filt, float *num, float *den, - int w, int h, int mu1_sq_stride, int mu2_sq_stride, int mu1_mu2_stride, int xx_filt_stride, int yy_filt_stride, int xy_filt_stride, int num_stride, int den_stride) -{ - static const float sigma_nsq = 2; - static const float sigma_max_inv = 4.0/(255.0*255.0); - - int mu1_sq_px_stride = mu1_sq_stride / sizeof(float); - int mu2_sq_px_stride = mu2_sq_stride / sizeof(float); - int mu1_mu2_px_stride = mu1_mu2_stride / sizeof(float); - int xx_filt_px_stride = xx_filt_stride / sizeof(float); - int yy_filt_px_stride = yy_filt_stride / sizeof(float); - int xy_filt_px_stride = xy_filt_stride / sizeof(float); - int num_px_stride = num_stride / sizeof(float); - int den_px_stride = den_stride / sizeof(float); - - float mu1_sq_val, mu2_sq_val, mu1_mu2_val, xx_filt_val, yy_filt_val, xy_filt_val; - float sigma1_sq, sigma2_sq, sigma12, g, sv_sq; - float num_val, den_val; - int i, j; - - for (i = 0; i < h; ++i) { - for (j = 0; j < w; ++j) { - mu1_sq_val = mu1_sq[i * mu1_sq_px_stride + j]; // same name as the Matlab code vifp_mscale.m - mu2_sq_val = mu2_sq[i * mu2_sq_px_stride + j]; - mu1_mu2_val = mu1_mu2[i * mu1_mu2_px_stride + j]; - xx_filt_val = xx_filt[i * xx_filt_px_stride + j]; - yy_filt_val = yy_filt[i * yy_filt_px_stride + j]; - xy_filt_val = xy_filt[i * xy_filt_px_stride + j]; - - sigma1_sq = xx_filt_val - mu1_sq_val; - sigma2_sq = yy_filt_val - mu2_sq_val; - sigma12 = xy_filt_val - mu1_mu2_val; - - if (sigma1_sq < sigma_nsq) { - num_val = 1.0 - sigma2_sq*sigma_max_inv; - den_val = 1.0; - } - else { - sv_sq = (sigma2_sq + sigma_nsq) * sigma1_sq; - if( sigma12 < 0 ) - { - num_val = 0.0; - } - else - { - g = sv_sq - sigma12 * sigma12; - num_val = log2f(sv_sq / g); - } - den_val = log2f(1.0f + sigma1_sq / sigma_nsq); - } - - num[i * num_px_stride + j] = num_val; - den[i * den_px_stride + j] = den_val; - } - } -} -#endif void vif_filter1d_s(const float *f, const float *src, float *dst, float *tmpbuf, int w, int h, int src_stride, int dst_stride, int fwidth) { @@ -402,8 +343,8 @@ void vif_filter1d_s(const float *f, const float *src, float *dst, float *tmpbuf, aligned_free(tmp); } -#if VIF_OPT_ENABLE -// Code optimized by adding intrinsic code for the functions, + +// Code optimized by adding intrinsic code for the functions, // vif_filter1d_sq and vif_filter1d_sq void vif_filter1d_sq_s(const float *f, const float *src, float *dst, float *tmpbuf, int w, int h, int src_stride, int dst_stride, int fwidth) @@ -531,7 +472,6 @@ void vif_filter1d_xy_s(const float *f, const float *src1, const float *src2, flo aligned_free(tmp); } -#endif void vif_filter2d_s(const float *f, const float *src, float *dst, int w, int h, int src_stride, int dst_stride, int fwidth) { diff --git a/feature/src/vif_tools.h b/feature/src/vif_tools.h index d2a1bd97d..459b934f9 100644 --- a/feature/src/vif_tools.h +++ b/feature/src/vif_tools.h @@ -43,11 +43,9 @@ void vif_statistic_s(const float *mu1_sq, const float *mu2_sq, const float *mu1_ void vif_filter1d_s(const float *f, const float *src, float *dst, float *tmpbuf, int w, int h, int src_stride, int dst_stride, int fwidth); -#if VIF_OPT_ENABLE void vif_filter1d_sq_s(const float *f, const float *src, float *dst, float *tmpbuf, int w, int h, int src_stride, int dst_stride, int fwidth); void vif_filter1d_xy_s(const float *f, const float *src1, const float *src2, float *dst, float *tmpbuf, int w, int h, int src1_stride, int src2_stride, int dst_stride, int fwidth); -#endif void vif_filter2d_s(const float *f, const float *src, float *dst, int w, int h, int src_stride, int dst_stride, int fwidth); From c1626ff2734c1db40c38ca6fa5adf63282512efb Mon Sep 17 00:00:00 2001 From: Zhi Li Date: Thu, 31 Jan 2019 14:48:26 -0800 Subject: [PATCH 17/29] Fix: Remove adm related non-optimized code. --- feature/src/adm_tools.h | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/feature/src/adm_tools.h b/feature/src/adm_tools.h index c95b1c47c..92d12d9b7 100644 --- a/feature/src/adm_tools.h +++ b/feature/src/adm_tools.h @@ -146,22 +146,6 @@ void dwt2_src_indices_filt_s(int **src_ind_y, int **src_ind_x, int w, int h); void adm_dwt2_s(const float *src, const adm_dwt_band_t_s *dst, int **ind_y, int **ind_x, int w, int h, int src_stride, int dst_stride); -#else - -float adm_sum_cube_s(const float *x, int w, int h, int stride, double border_factor); - -void adm_decouple_s(const adm_dwt_band_t_s *ref, const adm_dwt_band_t_s *dis, const adm_dwt_band_t_s *r, const adm_dwt_band_t_s *a, int w, int h, int ref_stride, int dis_stride, int r_stride, int a_stride); - -void adm_csf_s(const adm_dwt_band_t_s *src, const adm_dwt_band_t_s *dst, int orig_h, int scale, int w, int h, int src_stride, int dst_stride); - -void adm_cm_thresh_s(const adm_dwt_band_t_s *src, float *dst, int w, int h, int src_stride, int dst_stride); - -void adm_cm_s(const adm_dwt_band_t_s *src, const adm_dwt_band_t_s *dst, const float *thresh, int w, int h, int src_stride, int dst_stride, int thresh_stride); - -void adm_dwt2_s(const float *src, const adm_dwt_band_t_s *dst, int w, int h, int src_stride, int dst_stride); - -void adm_buffer_copy(const void *src, void *dst, int linewidth, int h, int src_stride, int dst_stride); - /* ================= */ /* Noise floor model */ /* ================= */ From bf0bc4154cb770533fe4c0338adcd980370b435a Mon Sep 17 00:00:00 2001 From: Zhi Li Date: Thu, 31 Jan 2019 15:25:23 -0800 Subject: [PATCH 18/29] Update libvmaf.md --- resource/doc/libvmaf.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resource/doc/libvmaf.md b/resource/doc/libvmaf.md index 46c3cd82f..7a4e7edb5 100644 --- a/resource/doc/libvmaf.md +++ b/resource/doc/libvmaf.md @@ -41,7 +41,7 @@ To uninstall the library run: make uninstall ``` -### Use libvmaf with FFmpeg +### Use `libvmaf.a` with FFmpeg After installing `libvmaf.a`, you can use it with FFmpeg. Under FFmpeg directory, configure, build and install FFmpeg with: From a1cb186ff5930bf02406fa01f52e2f7e30b823df Mon Sep 17 00:00:00 2001 From: Zhi Li Date: Thu, 31 Jan 2019 15:52:16 -0800 Subject: [PATCH 19/29] Update version to 1.3.12; update CHANGELOG, README, VERSION, libvmaf.pc. --- CHANGELOG.md | 7 +++++++ README.md | 5 ++--- VERSION | 2 +- wrapper/libvmaf.pc | 2 +- 4 files changed, 11 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4f3ea5ead..9caef43a1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,12 @@ # Change Log +## (1/31/2019) [1.3.12] + +**New features:** +- Optimized C code for speed. Running in multithreading mode, `vmafossexec` achieves ~40% run time reduction compared to the previous version. +- Printed out individual vmaf bootstrap scores in text file from `vmafossexec`. +- refactored windows solution (#283) (#284) (#285) (#291) + ## (12/17/2018) [1.3.11] **New features:** diff --git a/README.md b/README.md index e9f7b7d4d..b7212081d 100644 --- a/README.md +++ b/README.md @@ -2,16 +2,15 @@ VMAF - Video Multi-Method Assessment Fusion =================== [![Build Status](https://travis-ci.org/Netflix/vmaf.svg?branch=master)](https://travis-ci.org/Netflix/vmaf) -VMAF is a perceptual video quality assessment algorithm developed by Netflix. VMAF Development Kit (VDK) is a software package that contains the VMAF algorithm implementation, as well as a set of tools that allows a user to train and test a custom VMAF model. For an overview, read [this](http://techblog.netflix.com/2016/06/toward-practical-perceptual-video.html) tech blog post, or [this](resource/doc/VMAF_ICIP17.pdf) slide deck. +VMAF is a perceptual video quality assessment algorithm developed by Netflix. VMAF Development Kit (VDK) is a software package that contains the VMAF algorithm implementation, as well as a set of tools that allows a user to train and test a custom VMAF model. Read [this](https://medium.com/netflix-techblog/toward-a-practical-perceptual-video-quality-metric-653f208b9652) techblog post for an overview, or [this](https://medium.com/netflix-techblog/vmaf-the-journey-continues-44b51ee9ed12) post for the latest updates and tips for best practices. ## News +- (1/31/19) Optimized C code for speed. Running in multithreading mode, `vmafossexec` achieves ~40% run time reduction compared to the previous version. - (11/19/18) Added a BD-rate calculator implementation. See more details [here](resource/doc/VMAF_Python_library.md#bd-rate-calculator). - (10/25/18) We have published our [second techblog on VMAF](https://medium.com/netflix-techblog/vmaf-the-journey-continues-44b51ee9ed12), with recommendations on best practices. - (9/13/18) [SUREAL](https://github.com/Netflix/sureal) is no longer a submodule to VMAF. - (6/19/18) Each VMAF prediction score now comes with a 95% [confidence interval (CI)](resource/doc/conf_interval.md), which quantifies the level of confidence that the prediction lies within the interval. -- (6/19/18) Added a [4K VMAF model](resource/doc/models.md/#predict-quality-on-a-4ktv-screen-at-15h) under `model/vmaf_4k_v0.6.1.pkl`, which predicts the subjective quality of video displayed on a 4KTV and viewed from the distance of 1.5X the display height. -- (6/5/18) Speed optimization to [`vmafossexec`](resource/doc/vmafossexec.md): 1) support multi-threading (e.g. use `--thread 0` to use all cores), 2) support frame sampling (e.g. use `--subsample 5` to calculate VMAF on one of every 5 frames). ## Frequently Asked Questions diff --git a/VERSION b/VERSION index 488dcd99b..1bd4e672b 100644 --- a/VERSION +++ b/VERSION @@ -1,2 +1,2 @@ -VMAF Development Kit (VDK) Version 1.3.11 +VMAF Development Kit (VDK) Version 1.3.12 VMAF Version 0.6.1 diff --git a/wrapper/libvmaf.pc b/wrapper/libvmaf.pc index 6dee1512c..e11b8d0fb 100644 --- a/wrapper/libvmaf.pc +++ b/wrapper/libvmaf.pc @@ -5,7 +5,7 @@ includedir=/usr/local/include Name: libvmaf Description: Netflix's VMAF library -Version: 1.3.11 +Version: 1.3.12 URL: https://github.com/Netflix/vmaf Requires: Requires.private: From 3772d0154fd4999a6c6c23ff7bbcb5ecea55e816 Mon Sep 17 00:00:00 2001 From: Zhi Li Date: Thu, 31 Jan 2019 17:44:53 -0800 Subject: [PATCH 20/29] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9caef43a1..4ad285270 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,7 @@ **New features:** - Optimized C code for speed. Running in multithreading mode, `vmafossexec` achieves ~40% run time reduction compared to the previous version. - Printed out individual vmaf bootstrap scores in text file from `vmafossexec`. -- refactored windows solution (#283) (#284) (#285) (#291) +- refactored windows solution (#283) (#284) (#285) (#291) (#298) ## (12/17/2018) [1.3.11] From fbb9d3ecda8cc2bd80ecbdd63f877825216045be Mon Sep 17 00:00:00 2001 From: Zhi Li Date: Thu, 31 Jan 2019 17:50:08 -0800 Subject: [PATCH 21/29] Update version to 1.3.13. --- CHANGELOG.md | 4 ++-- VERSION | 2 +- wrapper/libvmaf.pc | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4ad285270..a711deeb9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,11 +1,11 @@ # Change Log -## (1/31/2019) [1.3.12] +## (1/31/2019) [1.3.13] **New features:** - Optimized C code for speed. Running in multithreading mode, `vmafossexec` achieves ~40% run time reduction compared to the previous version. - Printed out individual vmaf bootstrap scores in text file from `vmafossexec`. -- refactored windows solution (#283) (#284) (#285) (#291) (#298) +- refactored windows solution (#283) (#284) (#285) (#291) (#298). ## (12/17/2018) [1.3.11] diff --git a/VERSION b/VERSION index 1bd4e672b..3df5d2895 100644 --- a/VERSION +++ b/VERSION @@ -1,2 +1,2 @@ -VMAF Development Kit (VDK) Version 1.3.12 +VMAF Development Kit (VDK) Version 1.3.13 VMAF Version 0.6.1 diff --git a/wrapper/libvmaf.pc b/wrapper/libvmaf.pc index e11b8d0fb..e39035c80 100644 --- a/wrapper/libvmaf.pc +++ b/wrapper/libvmaf.pc @@ -5,7 +5,7 @@ includedir=/usr/local/include Name: libvmaf Description: Netflix's VMAF library -Version: 1.3.12 +Version: 1.3.13 URL: https://github.com/Netflix/vmaf Requires: Requires.private: From 4333cc2460562b99e10a05595aba8ecb07a874d8 Mon Sep 17 00:00:00 2001 From: Holy Wu Date: Sat, 2 Feb 2019 11:07:04 +0800 Subject: [PATCH 22/29] Update VMAFOSS_DOC_VERSION to 1.3.13 --- wrapper/src/vmaf.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wrapper/src/vmaf.cpp b/wrapper/src/vmaf.cpp index 5972d9c99..98ab6cc78 100644 --- a/wrapper/src/vmaf.cpp +++ b/wrapper/src/vmaf.cpp @@ -946,7 +946,7 @@ void BootstrapVmafQualityRunner::_set_prediction_result( } -static const char VMAFOSS_DOC_VERSION[] = "1.3.11"; +static const char VMAFOSS_DOC_VERSION[] = "1.3.13"; double RunVmaf(const char* fmt, int width, int height, int (*read_frame)(float *ref_data, float *main_data, float *temp_data, int stride, void *user_data), From f848a98073116f4e28531b60951df049b4ddc682 Mon Sep 17 00:00:00 2001 From: Zhi Li Date: Sat, 2 Feb 2019 15:00:59 -0800 Subject: [PATCH 23/29] Update year. --- CONTRIBUTING.md | 2 +- LICENSE | 2 +- feature/src/adm.c | 2 +- feature/src/adm_tools.h | 2 +- feature/src/all.c | 2 +- feature/src/ansnr_tools.h | 2 +- feature/src/common/frame.c | 2 +- feature/src/common/frame.h | 2 +- feature/src/moment.c | 2 +- feature/src/moment_main.c | 2 +- feature/src/moment_options.h | 2 +- feature/src/motion.c | 2 +- feature/src/motion_options.h | 2 +- feature/src/ms_ssim.c | 2 +- feature/src/psnr.c | 2 +- feature/src/psnr_main.c | 2 +- feature/src/psnr_tools.c | 2 +- feature/src/psnr_tools.h | 2 +- feature/src/vif_options.h | 2 +- feature/src/vif_tools.c | 2 +- feature/src/vif_tools.h | 2 +- python/script/ffmpeg2vmaf.py | 2 +- python/script/run_cleaning_cache.py | 2 +- python/script/run_psnr.py | 2 +- python/script/run_testing.py | 2 +- python/script/run_toddnoiseclassifier.py | 2 +- python/script/run_vmaf.py | 2 +- python/script/run_vmaf_cross_validation.py | 2 +- python/script/run_vmaf_in_batch.py | 2 +- python/script/run_vmaf_training.py | 2 +- python/src/vmaf/core/h5py_mixin.py | 2 +- python/test/asset_test.py | 2 +- python/test/command_line_test.py | 2 +- python/test/cross_validation_test.py | 2 +- python/test/extra/command_line_extratest.py | 2 +- python/test/feature_assembler_test.py | 2 +- python/test/feature_extractor_test.py | 2 +- python/test/local_explainer_test.py | 2 +- python/test/noref_feature_extractor_test.py | 2 +- python/test/perf_metric_test.py | 2 +- python/test/quality_runner_test.py | 2 +- python/test/raw_extractor_test.py | 2 +- python/test/reader_test.py | 2 +- python/test/result_test.py | 2 +- python/test/routine_test.py | 2 +- python/test/testutil.py | 2 +- python/test/train_test_model_test.py | 2 +- python/test/vmafossexec_test.py | 2 +- 48 files changed, 48 insertions(+), 48 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 3f0c685f6..ad5cf954d 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -8,7 +8,7 @@ By contributing your code, you agree to license your contribution under the term ``` /** - * Copyright 2016-2018 the original author or authors. + * Copyright 2016-2019 the original author or authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/LICENSE b/LICENSE index aa6790299..fd51098ac 100644 --- a/LICENSE +++ b/LICENSE @@ -186,7 +186,7 @@ same "printed page" as the copyright notice for easier identification within third-party archives. - Copyright 2016-2018 Netflix, Inc. + Copyright 2016-2019 Netflix, Inc. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/feature/src/adm.c b/feature/src/adm.c index 8e9160b35..28ed2df1a 100644 --- a/feature/src/adm.c +++ b/feature/src/adm.c @@ -1,6 +1,6 @@ /** * - * Copyright 2016-2018 Netflix, Inc. + * Copyright 2016-2019 Netflix, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/feature/src/adm_tools.h b/feature/src/adm_tools.h index 92d12d9b7..386447513 100644 --- a/feature/src/adm_tools.h +++ b/feature/src/adm_tools.h @@ -1,6 +1,6 @@ /** * - * Copyright 2016-2018 Netflix, Inc. + * Copyright 2016-2019 Netflix, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/feature/src/all.c b/feature/src/all.c index 71d8c358e..9dece9325 100644 --- a/feature/src/all.c +++ b/feature/src/all.c @@ -1,6 +1,6 @@ /** * - * Copyright 2016-2018 Netflix, Inc. + * Copyright 2016-2019 Netflix, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/feature/src/ansnr_tools.h b/feature/src/ansnr_tools.h index 32415b595..30cb52a59 100644 --- a/feature/src/ansnr_tools.h +++ b/feature/src/ansnr_tools.h @@ -1,6 +1,6 @@ /** * - * Copyright 2016-2018 Netflix, Inc. + * Copyright 2016-2019 Netflix, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/feature/src/common/frame.c b/feature/src/common/frame.c index 1bdd87cec..fd5319430 100644 --- a/feature/src/common/frame.c +++ b/feature/src/common/frame.c @@ -1,6 +1,6 @@ /** * - * Copyright 2016-2018 Netflix, Inc. + * Copyright 2016-2019 Netflix, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/feature/src/common/frame.h b/feature/src/common/frame.h index 33ec436a0..44e41bfd3 100644 --- a/feature/src/common/frame.h +++ b/feature/src/common/frame.h @@ -1,6 +1,6 @@ /** * - * Copyright 2016-2018 Netflix, Inc. + * Copyright 2016-2019 Netflix, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/feature/src/moment.c b/feature/src/moment.c index 95268f9e8..63e31a649 100644 --- a/feature/src/moment.c +++ b/feature/src/moment.c @@ -1,6 +1,6 @@ /** * - * Copyright 2016-2018 Netflix, Inc. + * Copyright 2016-2019 Netflix, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/feature/src/moment_main.c b/feature/src/moment_main.c index c201f21c6..e2d35e369 100644 --- a/feature/src/moment_main.c +++ b/feature/src/moment_main.c @@ -1,6 +1,6 @@ /** * - * Copyright 2016-2018 Netflix, Inc. + * Copyright 2016-2019 Netflix, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/feature/src/moment_options.h b/feature/src/moment_options.h index 29d190d48..bc8a59dbb 100644 --- a/feature/src/moment_options.h +++ b/feature/src/moment_options.h @@ -1,6 +1,6 @@ /** * - * Copyright 2016-2018 Netflix, Inc. + * Copyright 2016-2019 Netflix, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/feature/src/motion.c b/feature/src/motion.c index fa44d2def..17665ada8 100644 --- a/feature/src/motion.c +++ b/feature/src/motion.c @@ -1,6 +1,6 @@ /** * - * Copyright 2016-2018 Netflix, Inc. + * Copyright 2016-2019 Netflix, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/feature/src/motion_options.h b/feature/src/motion_options.h index e17341d85..cf96903a0 100644 --- a/feature/src/motion_options.h +++ b/feature/src/motion_options.h @@ -1,6 +1,6 @@ /** * - * Copyright 2016-2018 Netflix, Inc. + * Copyright 2016-2019 Netflix, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/feature/src/ms_ssim.c b/feature/src/ms_ssim.c index c47aa01e9..a8cdf33bf 100644 --- a/feature/src/ms_ssim.c +++ b/feature/src/ms_ssim.c @@ -1,6 +1,6 @@ /** * - * Copyright 2016-2018 Netflix, Inc. + * Copyright 2016-2019 Netflix, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/feature/src/psnr.c b/feature/src/psnr.c index 6804dc1f6..81a229251 100644 --- a/feature/src/psnr.c +++ b/feature/src/psnr.c @@ -1,6 +1,6 @@ /** * - * Copyright 2016-2018 Netflix, Inc. + * Copyright 2016-2019 Netflix, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/feature/src/psnr_main.c b/feature/src/psnr_main.c index 32ba86255..f036d2022 100644 --- a/feature/src/psnr_main.c +++ b/feature/src/psnr_main.c @@ -1,6 +1,6 @@ /** * - * Copyright 2016-2018 Netflix, Inc. + * Copyright 2016-2019 Netflix, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/feature/src/psnr_tools.c b/feature/src/psnr_tools.c index 26934eac6..0c3825555 100644 --- a/feature/src/psnr_tools.c +++ b/feature/src/psnr_tools.c @@ -1,6 +1,6 @@ /** * - * Copyright 2016-2018 Netflix, Inc. + * Copyright 2016-2019 Netflix, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/feature/src/psnr_tools.h b/feature/src/psnr_tools.h index 8f480cf82..a60dec78a 100644 --- a/feature/src/psnr_tools.h +++ b/feature/src/psnr_tools.h @@ -1,6 +1,6 @@ /** * - * Copyright 2016-2018 Netflix, Inc. + * Copyright 2016-2019 Netflix, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/feature/src/vif_options.h b/feature/src/vif_options.h index ef1f93b2e..30bea189c 100644 --- a/feature/src/vif_options.h +++ b/feature/src/vif_options.h @@ -1,6 +1,6 @@ /** * - * Copyright 2016-2018 Netflix, Inc. + * Copyright 2016-2019 Netflix, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/feature/src/vif_tools.c b/feature/src/vif_tools.c index 6e66a3355..5b6913ee8 100644 --- a/feature/src/vif_tools.c +++ b/feature/src/vif_tools.c @@ -1,6 +1,6 @@ /** * - * Copyright 2016-2018 Netflix, Inc. + * Copyright 2016-2019 Netflix, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/feature/src/vif_tools.h b/feature/src/vif_tools.h index 459b934f9..61f882685 100644 --- a/feature/src/vif_tools.h +++ b/feature/src/vif_tools.h @@ -1,6 +1,6 @@ /** * - * Copyright 2016-2018 Netflix, Inc. + * Copyright 2016-2019 Netflix, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/python/script/ffmpeg2vmaf.py b/python/script/ffmpeg2vmaf.py index 43091dc9b..f302a80a5 100755 --- a/python/script/ffmpeg2vmaf.py +++ b/python/script/ffmpeg2vmaf.py @@ -15,7 +15,7 @@ cmd_option_exists from vmaf.tools.stats import ListStats -__copyright__ = "Copyright 2016-2018, Netflix, Inc." +__copyright__ = "Copyright 2016-2019, Netflix, Inc." __license__ = "Apache, Version 2.0" FMTS = ['yuv420p', 'yuv422p', 'yuv444p', 'yuv420p10le', 'yuv422p10le', 'yuv444p10le'] diff --git a/python/script/run_cleaning_cache.py b/python/script/run_cleaning_cache.py index cd4a5b66c..a20f5dd29 100755 --- a/python/script/run_cleaning_cache.py +++ b/python/script/run_cleaning_cache.py @@ -6,7 +6,7 @@ from vmaf.routine import run_remove_results_for_dataset from vmaf.tools.misc import import_python_file -__copyright__ = "Copyright 2016-2018, Netflix, Inc." +__copyright__ = "Copyright 2016-2019, Netflix, Inc." __license__ = "Apache, Version 2.0" diff --git a/python/script/run_psnr.py b/python/script/run_psnr.py index 1a5082cc0..d93f59f33 100755 --- a/python/script/run_psnr.py +++ b/python/script/run_psnr.py @@ -12,7 +12,7 @@ from vmaf.tools.misc import get_cmd_option from vmaf.tools.stats import ListStats -__copyright__ = "Copyright 2016-2018, Netflix, Inc." +__copyright__ = "Copyright 2016-2019, Netflix, Inc." __license__ = "Apache, Version 2.0" FMTS = ['yuv420p', 'yuv422p', 'yuv444p', 'yuv420p10le', 'yuv422p10le', 'yuv444p10le'] diff --git a/python/script/run_testing.py b/python/script/run_testing.py index 7ae2621dc..5e9e6fb9f 100755 --- a/python/script/run_testing.py +++ b/python/script/run_testing.py @@ -16,7 +16,7 @@ from vmaf.routine import run_test_on_dataset, print_matplotlib_warning from vmaf.tools.stats import ListStats -__copyright__ = "Copyright 2016-2018, Netflix, Inc." +__copyright__ = "Copyright 2016-2019, Netflix, Inc." __license__ = "Apache, Version 2.0" POOL_METHODS = ['mean', 'harmonic_mean', 'min', 'median', 'perc5', 'perc10', 'perc20'] diff --git a/python/script/run_toddnoiseclassifier.py b/python/script/run_toddnoiseclassifier.py index 23308e7b8..19fc0d0de 100644 --- a/python/script/run_toddnoiseclassifier.py +++ b/python/script/run_toddnoiseclassifier.py @@ -1,4 +1,4 @@ -__copyright__ = "Copyright 2016-2018, Netflix, Inc." +__copyright__ = "Copyright 2016-2019, Netflix, Inc." __license__ = "Apache, Version 2.0" import os diff --git a/python/script/run_vmaf.py b/python/script/run_vmaf.py index 99d20c85a..75c116846 100755 --- a/python/script/run_vmaf.py +++ b/python/script/run_vmaf.py @@ -15,7 +15,7 @@ cmd_option_exists from vmaf.tools.stats import ListStats -__copyright__ = "Copyright 2016-2018, Netflix, Inc." +__copyright__ = "Copyright 2016-2019, Netflix, Inc." __license__ = "Apache, Version 2.0" FMTS = ['yuv420p', 'yuv422p', 'yuv444p', 'yuv420p10le', 'yuv422p10le', 'yuv444p10le'] diff --git a/python/script/run_vmaf_cross_validation.py b/python/script/run_vmaf_cross_validation.py index e261fba64..cbb51495d 100644 --- a/python/script/run_vmaf_cross_validation.py +++ b/python/script/run_vmaf_cross_validation.py @@ -1,4 +1,4 @@ -__copyright__ = "Copyright 2016-2018, Netflix, Inc." +__copyright__ = "Copyright 2016-2019, Netflix, Inc." __license__ = "Apache, Version 2.0" import matplotlib.pyplot as plt diff --git a/python/script/run_vmaf_in_batch.py b/python/script/run_vmaf_in_batch.py index 803216b53..992ae0325 100755 --- a/python/script/run_vmaf_in_batch.py +++ b/python/script/run_vmaf_in_batch.py @@ -13,7 +13,7 @@ from vmaf.tools.misc import cmd_option_exists, get_cmd_option from vmaf.tools.stats import ListStats -__copyright__ = "Copyright 2016-2018, Netflix, Inc." +__copyright__ = "Copyright 2016-2019, Netflix, Inc." __license__ = "Apache, Version 2.0" FMTS = ['yuv420p', 'yuv422p', 'yuv444p', 'yuv420p10le', 'yuv422p10le', 'yuv444p10le'] diff --git a/python/script/run_vmaf_training.py b/python/script/run_vmaf_training.py index 320c15a24..651b54be8 100755 --- a/python/script/run_vmaf_training.py +++ b/python/script/run_vmaf_training.py @@ -14,7 +14,7 @@ from vmaf.routine import print_matplotlib_warning, train_test_vmaf_on_dataset from vmaf.tools.stats import ListStats -__copyright__ = "Copyright 2016-2018, Netflix, Inc." +__copyright__ = "Copyright 2016-2019, Netflix, Inc." __license__ = "Apache, Version 2.0" POOL_METHODS = ['mean', 'harmonic_mean', 'min', 'median', 'perc5', 'perc10', 'perc20'] diff --git a/python/src/vmaf/core/h5py_mixin.py b/python/src/vmaf/core/h5py_mixin.py index efa3f4619..659fee9bb 100644 --- a/python/src/vmaf/core/h5py_mixin.py +++ b/python/src/vmaf/core/h5py_mixin.py @@ -1,6 +1,6 @@ import h5py -__copyright__ = "Copyright 2016-2018, Netflix, Inc." +__copyright__ = "Copyright 2016-2019, Netflix, Inc." __license__ = "Apache, Version 2.0" diff --git a/python/test/asset_test.py b/python/test/asset_test.py index 6880cee33..11cadb0ee 100644 --- a/python/test/asset_test.py +++ b/python/test/asset_test.py @@ -1,4 +1,4 @@ -__copyright__ = "Copyright 2016-2018, Netflix, Inc." +__copyright__ = "Copyright 2016-2019, Netflix, Inc." __license__ = "Apache, Version 2.0" import unittest diff --git a/python/test/command_line_test.py b/python/test/command_line_test.py index f3ba4667a..27a5639da 100644 --- a/python/test/command_line_test.py +++ b/python/test/command_line_test.py @@ -5,7 +5,7 @@ from vmaf.config import VmafConfig from vmaf.tools.misc import run_process -__copyright__ = "Copyright 2016-2018, Netflix, Inc." +__copyright__ = "Copyright 2016-2019, Netflix, Inc." __license__ = "Apache, Version 2.0" class CommandLineTest(unittest.TestCase): diff --git a/python/test/cross_validation_test.py b/python/test/cross_validation_test.py index 831d00834..2c2005a44 100644 --- a/python/test/cross_validation_test.py +++ b/python/test/cross_validation_test.py @@ -1,4 +1,4 @@ -__copyright__ = "Copyright 2016-2018, Netflix, Inc." +__copyright__ = "Copyright 2016-2019, Netflix, Inc." __license__ = "Apache, Version 2.0" import unittest diff --git a/python/test/extra/command_line_extratest.py b/python/test/extra/command_line_extratest.py index 64fee4a49..362edbf3d 100644 --- a/python/test/extra/command_line_extratest.py +++ b/python/test/extra/command_line_extratest.py @@ -5,7 +5,7 @@ from vmaf import run_process from vmaf.config import VmafConfig -__copyright__ = "Copyright 2016-2018, Netflix, Inc." +__copyright__ = "Copyright 2016-2019, Netflix, Inc." __license__ = "Apache, Version 2.0" class CommandLineTest(unittest.TestCase): diff --git a/python/test/feature_assembler_test.py b/python/test/feature_assembler_test.py index 2c4e490d7..0c552291f 100644 --- a/python/test/feature_assembler_test.py +++ b/python/test/feature_assembler_test.py @@ -1,4 +1,4 @@ -__copyright__ = "Copyright 2016-2018, Netflix, Inc." +__copyright__ = "Copyright 2016-2019, Netflix, Inc." __license__ = "Apache, Version 2.0" import unittest diff --git a/python/test/feature_extractor_test.py b/python/test/feature_extractor_test.py index 2a4ec6c65..921ad8696 100644 --- a/python/test/feature_extractor_test.py +++ b/python/test/feature_extractor_test.py @@ -1,4 +1,4 @@ -__copyright__ = "Copyright 2016-2018, Netflix, Inc." +__copyright__ = "Copyright 2016-2019, Netflix, Inc." __license__ = "Apache, Version 2.0" import os diff --git a/python/test/local_explainer_test.py b/python/test/local_explainer_test.py index 7076f4ddb..abb305919 100644 --- a/python/test/local_explainer_test.py +++ b/python/test/local_explainer_test.py @@ -16,7 +16,7 @@ from vmaf.routine import read_dataset from vmaf.tools.misc import import_python_file -__copyright__ = "Copyright 2016-2018, Netflix, Inc." +__copyright__ = "Copyright 2016-2019, Netflix, Inc." __license__ = "Apache, Version 2.0" diff --git a/python/test/noref_feature_extractor_test.py b/python/test/noref_feature_extractor_test.py index c6ab47bcb..285017f12 100644 --- a/python/test/noref_feature_extractor_test.py +++ b/python/test/noref_feature_extractor_test.py @@ -1,6 +1,6 @@ from vmaf.core.executor import run_executors_in_parallel -__copyright__ = "Copyright 2016-2018, Netflix, Inc." +__copyright__ = "Copyright 2016-2019, Netflix, Inc." __license__ = "Apache, Version 2.0" import unittest diff --git a/python/test/perf_metric_test.py b/python/test/perf_metric_test.py index ff88008d2..836cea6ee 100644 --- a/python/test/perf_metric_test.py +++ b/python/test/perf_metric_test.py @@ -7,7 +7,7 @@ from vmaf.core.perf_metric import RmsePerfMetric, SrccPerfMetric, PccPerfMetric, \ KendallPerfMetric, AucPerfMetric, ResolvingPowerPerfMetric -__copyright__ = "Copyright 2016-2018, Netflix, Inc." +__copyright__ = "Copyright 2016-2019, Netflix, Inc." __license__ = "Apache, Version 2.0" class AggrScorePerfMetricTest(unittest.TestCase): diff --git a/python/test/quality_runner_test.py b/python/test/quality_runner_test.py index 9268da547..f42e79c91 100644 --- a/python/test/quality_runner_test.py +++ b/python/test/quality_runner_test.py @@ -1,4 +1,4 @@ -__copyright__ = "Copyright 2016-2018, Netflix, Inc." +__copyright__ = "Copyright 2016-2019, Netflix, Inc." __license__ = "Apache, Version 2.0" import os diff --git a/python/test/raw_extractor_test.py b/python/test/raw_extractor_test.py index 82b857dc2..d9fa77e1d 100644 --- a/python/test/raw_extractor_test.py +++ b/python/test/raw_extractor_test.py @@ -1,6 +1,6 @@ from vmaf.core.executor import run_executors_in_parallel -__copyright__ = "Copyright 2016-2018, Netflix, Inc." +__copyright__ = "Copyright 2016-2019, Netflix, Inc." __license__ = "Apache, Version 2.0" import unittest diff --git a/python/test/reader_test.py b/python/test/reader_test.py index c0ed3fba3..3510e0a6d 100644 --- a/python/test/reader_test.py +++ b/python/test/reader_test.py @@ -1,4 +1,4 @@ -__copyright__ = "Copyright 2016-2018, Netflix, Inc." +__copyright__ = "Copyright 2016-2019, Netflix, Inc." __license__ = "Apache, Version 2.0" import unittest diff --git a/python/test/result_test.py b/python/test/result_test.py index 8fa97f9bf..7ca9ea5d7 100644 --- a/python/test/result_test.py +++ b/python/test/result_test.py @@ -1,6 +1,6 @@ from testutil import set_default_576_324_videos_for_testing -__copyright__ = "Copyright 2016-2018, Netflix, Inc." +__copyright__ = "Copyright 2016-2019, Netflix, Inc." __license__ = "Apache, Version 2.0" import json diff --git a/python/test/routine_test.py b/python/test/routine_test.py index 111d1e055..fcf31a8df 100644 --- a/python/test/routine_test.py +++ b/python/test/routine_test.py @@ -8,7 +8,7 @@ from vmaf.core.quality_runner import VmafQualityRunner, BootstrapVmafQualityRunner from sureal.subjective_model import MosModel -__copyright__ = "Copyright 2016-2018, Netflix, Inc." +__copyright__ = "Copyright 2016-2019, Netflix, Inc." __license__ = "Apache, Version 2.0" diff --git a/python/test/testutil.py b/python/test/testutil.py index d665c02d2..980103f04 100644 --- a/python/test/testutil.py +++ b/python/test/testutil.py @@ -1,4 +1,4 @@ -__copyright__ = "Copyright 2016-2018, Netflix, Inc." +__copyright__ = "Copyright 2016-2019, Netflix, Inc." __license__ = "Apache, Version 2.0" from vmaf.config import VmafConfig diff --git a/python/test/train_test_model_test.py b/python/test/train_test_model_test.py index 9192bfeb4..39417d051 100644 --- a/python/test/train_test_model_test.py +++ b/python/test/train_test_model_test.py @@ -13,7 +13,7 @@ from vmaf.tools.misc import import_python_file from vmaf.core.raw_extractor import DisYUVRawVideoExtractor -__copyright__ = "Copyright 2016-2018, Netflix, Inc." +__copyright__ = "Copyright 2016-2019, Netflix, Inc." __license__ = "Apache, Version 2.0" class TrainTestModelTest(unittest.TestCase): diff --git a/python/test/vmafossexec_test.py b/python/test/vmafossexec_test.py index c06860c55..5d558df78 100644 --- a/python/test/vmafossexec_test.py +++ b/python/test/vmafossexec_test.py @@ -7,7 +7,7 @@ from testutil import set_default_576_324_videos_for_testing -__copyright__ = "Copyright 2016-2018, Netflix, Inc." +__copyright__ = "Copyright 2016-2019, Netflix, Inc." __license__ = "Apache, Version 2.0" From 7ec310955343a7ae4c6180cdb93b7eb51911cabf Mon Sep 17 00:00:00 2001 From: Zhi Li Date: Sat, 2 Feb 2019 15:12:23 -0800 Subject: [PATCH 24/29] Update year. --- feature/src/adm_options.h | 2 +- feature/src/adm_tools.c | 2 +- feature/src/all_options.h | 2 +- feature/src/ansnr.c | 2 +- feature/src/ansnr_options.h | 2 +- feature/src/ansnr_tools.c | 2 +- feature/src/common/alignment.c | 2 +- feature/src/common/alignment.h | 2 +- feature/src/common/alloc.c | 2 +- feature/src/common/alloc.h | 2 +- feature/src/common/convolution.c | 2 +- feature/src/common/convolution.h | 2 +- feature/src/common/convolution_avx.c | 2 +- feature/src/common/convolution_internal.h | 2 +- feature/src/common/cpu.c | 2 +- feature/src/common/cpu.h | 2 +- feature/src/common/file_io.c | 2 +- feature/src/common/file_io.h | 2 +- feature/src/common/macros.h | 2 +- feature/src/iqa/iqa_options.h | 2 +- feature/src/motion_tools.h | 2 +- feature/src/ms_ssim_main.c | 2 +- feature/src/psnr_options.h | 2 +- feature/src/ssim.c | 2 +- feature/src/ssim_main.c | 2 +- feature/src/vif.c | 2 +- feature/src/vmaf_main.c | 2 +- python/script/run_result_assembly.py | 2 +- python/script/run_vmafossexec_subsampling.py | 2 +- python/src/vmaf/core/matlab_feature_extractor.py | 2 +- python/src/vmaf/core/matlab_quality_runner.py | 2 +- python/src/vmaf/core/niqe_train_test_model.py | 2 +- python/test/bootstrap_train_test_model_test.py | 2 +- python/test/executor_test.py | 2 +- python/test/extra/testutil.py | 2 +- python/test/niqe_train_test_model_test.py | 2 +- wrapper/src/combo.c | 2 +- wrapper/src/combo.h | 2 +- wrapper/src/darray.c | 2 +- wrapper/src/darray.h | 2 +- wrapper/src/debug.h | 2 +- wrapper/src/libvmaf.cpp | 2 +- wrapper/src/libvmaf.h | 2 +- wrapper/src/main.cpp | 2 +- wrapper/src/timer.h | 2 +- wrapper/src/vmaf.cpp | 2 +- wrapper/src/vmaf.h | 2 +- 47 files changed, 47 insertions(+), 47 deletions(-) diff --git a/feature/src/adm_options.h b/feature/src/adm_options.h index 72f8fa7c4..952add1f8 100644 --- a/feature/src/adm_options.h +++ b/feature/src/adm_options.h @@ -1,6 +1,6 @@ /** * - * Copyright 2016-2018 Netflix, Inc. + * Copyright 2016-2019 Netflix, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/feature/src/adm_tools.c b/feature/src/adm_tools.c index d5578c706..d92428b36 100644 --- a/feature/src/adm_tools.c +++ b/feature/src/adm_tools.c @@ -1,6 +1,6 @@ /** * - * Copyright 2016-2018 Netflix, Inc. + * Copyright 2016-2019 Netflix, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/feature/src/all_options.h b/feature/src/all_options.h index 947182ca0..11c25f3b2 100644 --- a/feature/src/all_options.h +++ b/feature/src/all_options.h @@ -1,6 +1,6 @@ /** * - * Copyright 2016-2018 Netflix, Inc. + * Copyright 2016-2019 Netflix, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/feature/src/ansnr.c b/feature/src/ansnr.c index 9557da5cd..1befd7174 100644 --- a/feature/src/ansnr.c +++ b/feature/src/ansnr.c @@ -1,6 +1,6 @@ /** * - * Copyright 2016-2018 Netflix, Inc. + * Copyright 2016-2019 Netflix, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/feature/src/ansnr_options.h b/feature/src/ansnr_options.h index af3b5c099..b2c0908ed 100644 --- a/feature/src/ansnr_options.h +++ b/feature/src/ansnr_options.h @@ -1,6 +1,6 @@ /** * - * Copyright 2016-2018 Netflix, Inc. + * Copyright 2016-2019 Netflix, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/feature/src/ansnr_tools.c b/feature/src/ansnr_tools.c index 2bb150b2d..d74c310e3 100644 --- a/feature/src/ansnr_tools.c +++ b/feature/src/ansnr_tools.c @@ -1,6 +1,6 @@ /** * - * Copyright 2016-2018 Netflix, Inc. + * Copyright 2016-2019 Netflix, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/feature/src/common/alignment.c b/feature/src/common/alignment.c index eade10ec8..4bb520585 100644 --- a/feature/src/common/alignment.c +++ b/feature/src/common/alignment.c @@ -1,6 +1,6 @@ /** * - * Copyright 2016-2018 Netflix, Inc. + * Copyright 2016-2019 Netflix, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/feature/src/common/alignment.h b/feature/src/common/alignment.h index 7aa6dea81..f57da04e7 100644 --- a/feature/src/common/alignment.h +++ b/feature/src/common/alignment.h @@ -1,6 +1,6 @@ /** * - * Copyright 2016-2018 Netflix, Inc. + * Copyright 2016-2019 Netflix, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/feature/src/common/alloc.c b/feature/src/common/alloc.c index 25879a111..e7e6754d7 100644 --- a/feature/src/common/alloc.c +++ b/feature/src/common/alloc.c @@ -1,6 +1,6 @@ /** * - * Copyright 2016-2018 Netflix, Inc. + * Copyright 2016-2019 Netflix, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/feature/src/common/alloc.h b/feature/src/common/alloc.h index b4c07f3e9..228d87544 100644 --- a/feature/src/common/alloc.h +++ b/feature/src/common/alloc.h @@ -1,6 +1,6 @@ /** * - * Copyright 2016-2018 Netflix, Inc. + * Copyright 2016-2019 Netflix, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/feature/src/common/convolution.c b/feature/src/common/convolution.c index dcea7b4ea..e93d1ff2a 100644 --- a/feature/src/common/convolution.c +++ b/feature/src/common/convolution.c @@ -1,6 +1,6 @@ /** * - * Copyright 2016-2018 Netflix, Inc. + * Copyright 2016-2019 Netflix, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/feature/src/common/convolution.h b/feature/src/common/convolution.h index 1fb6344e0..d1840b0d2 100644 --- a/feature/src/common/convolution.h +++ b/feature/src/common/convolution.h @@ -1,6 +1,6 @@ /** * - * Copyright 2016-2018 Netflix, Inc. + * Copyright 2016-2019 Netflix, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/feature/src/common/convolution_avx.c b/feature/src/common/convolution_avx.c index c11d4540d..a066de691 100644 --- a/feature/src/common/convolution_avx.c +++ b/feature/src/common/convolution_avx.c @@ -1,6 +1,6 @@ /** * - * Copyright 2016-2018 Netflix, Inc. + * Copyright 2016-2019 Netflix, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/feature/src/common/convolution_internal.h b/feature/src/common/convolution_internal.h index 7dac2d21a..b8a9e2564 100644 --- a/feature/src/common/convolution_internal.h +++ b/feature/src/common/convolution_internal.h @@ -1,6 +1,6 @@ /** * - * Copyright 2016-2018 Netflix, Inc. + * Copyright 2016-2019 Netflix, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/feature/src/common/cpu.c b/feature/src/common/cpu.c index 4befea251..85156f804 100644 --- a/feature/src/common/cpu.c +++ b/feature/src/common/cpu.c @@ -1,6 +1,6 @@ /** * - * Copyright 2016-2018 Netflix, Inc. + * Copyright 2016-2019 Netflix, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/feature/src/common/cpu.h b/feature/src/common/cpu.h index 9999bdc2a..1225bbb0b 100644 --- a/feature/src/common/cpu.h +++ b/feature/src/common/cpu.h @@ -1,6 +1,6 @@ /** * - * Copyright 2016-2018 Netflix, Inc. + * Copyright 2016-2019 Netflix, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/feature/src/common/file_io.c b/feature/src/common/file_io.c index b51a4ba60..d8ebfbd1f 100644 --- a/feature/src/common/file_io.c +++ b/feature/src/common/file_io.c @@ -1,6 +1,6 @@ /** * - * Copyright 2016-2018 Netflix, Inc. + * Copyright 2016-2019 Netflix, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/feature/src/common/file_io.h b/feature/src/common/file_io.h index 26ddcfbce..4ecbec900 100644 --- a/feature/src/common/file_io.h +++ b/feature/src/common/file_io.h @@ -1,6 +1,6 @@ /** * - * Copyright 2016-2018 Netflix, Inc. + * Copyright 2016-2019 Netflix, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/feature/src/common/macros.h b/feature/src/common/macros.h index 565c693e2..989cb6cec 100644 --- a/feature/src/common/macros.h +++ b/feature/src/common/macros.h @@ -1,6 +1,6 @@ /** * - * Copyright 2016-2018 Netflix, Inc. + * Copyright 2016-2019 Netflix, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/feature/src/iqa/iqa_options.h b/feature/src/iqa/iqa_options.h index 907612354..c1d609717 100644 --- a/feature/src/iqa/iqa_options.h +++ b/feature/src/iqa/iqa_options.h @@ -1,6 +1,6 @@ /** * - * Copyright 2016-2018 Netflix, Inc. + * Copyright 2016-2019 Netflix, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/feature/src/motion_tools.h b/feature/src/motion_tools.h index cd8db5857..138bcb4b1 100644 --- a/feature/src/motion_tools.h +++ b/feature/src/motion_tools.h @@ -1,6 +1,6 @@ /** * - * Copyright 2016-2018 Netflix, Inc. + * Copyright 2016-2019 Netflix, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/feature/src/ms_ssim_main.c b/feature/src/ms_ssim_main.c index 22a16ca73..02eac271a 100644 --- a/feature/src/ms_ssim_main.c +++ b/feature/src/ms_ssim_main.c @@ -1,6 +1,6 @@ /** * - * Copyright 2016-2018 Netflix, Inc. + * Copyright 2016-2019 Netflix, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/feature/src/psnr_options.h b/feature/src/psnr_options.h index 8693bcd84..5b7b0f2e3 100644 --- a/feature/src/psnr_options.h +++ b/feature/src/psnr_options.h @@ -1,6 +1,6 @@ /** * - * Copyright 2016-2018 Netflix, Inc. + * Copyright 2016-2019 Netflix, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/feature/src/ssim.c b/feature/src/ssim.c index 2db950ac5..e3e6d6d2a 100644 --- a/feature/src/ssim.c +++ b/feature/src/ssim.c @@ -1,6 +1,6 @@ /** * - * Copyright 2016-2018 Netflix, Inc. + * Copyright 2016-2019 Netflix, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/feature/src/ssim_main.c b/feature/src/ssim_main.c index 72a9b3869..a3fb4c143 100644 --- a/feature/src/ssim_main.c +++ b/feature/src/ssim_main.c @@ -1,6 +1,6 @@ /** * - * Copyright 2016-2018 Netflix, Inc. + * Copyright 2016-2019 Netflix, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/feature/src/vif.c b/feature/src/vif.c index 65faa1968..91d844d60 100644 --- a/feature/src/vif.c +++ b/feature/src/vif.c @@ -1,6 +1,6 @@ /** * - * Copyright 2016-2018 Netflix, Inc. + * Copyright 2016-2019 Netflix, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/feature/src/vmaf_main.c b/feature/src/vmaf_main.c index 920cc9794..7a5489405 100644 --- a/feature/src/vmaf_main.c +++ b/feature/src/vmaf_main.c @@ -1,6 +1,6 @@ /** * - * Copyright 2016-2018 Netflix, Inc. + * Copyright 2016-2019 Netflix, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/python/script/run_result_assembly.py b/python/script/run_result_assembly.py index cbd33d767..015f76484 100644 --- a/python/script/run_result_assembly.py +++ b/python/script/run_result_assembly.py @@ -8,7 +8,7 @@ from vmaf.core.result import Result -__copyright__ = "Copyright 2016-2018, Netflix, Inc." +__copyright__ = "Copyright 2016-2019, Netflix, Inc." __license__ = "Apache, Version 2.0" diff --git a/python/script/run_vmafossexec_subsampling.py b/python/script/run_vmafossexec_subsampling.py index 36f48554b..ff6a2a3b9 100644 --- a/python/script/run_vmafossexec_subsampling.py +++ b/python/script/run_vmafossexec_subsampling.py @@ -11,7 +11,7 @@ from vmaf.tools.decorator import persist_to_dir from vmaf.tools.misc import import_python_file -__copyright__ = "Copyright 2016-2018, Netflix, Inc." +__copyright__ = "Copyright 2016-2019, Netflix, Inc." __license__ = "Apache, Version 2.0" diff --git a/python/src/vmaf/core/matlab_feature_extractor.py b/python/src/vmaf/core/matlab_feature_extractor.py index 480172e60..1160d7656 100644 --- a/python/src/vmaf/core/matlab_feature_extractor.py +++ b/python/src/vmaf/core/matlab_feature_extractor.py @@ -6,7 +6,7 @@ from vmaf.tools.misc import make_absolute_path, run_process from vmaf.tools.stats import ListStats -__copyright__ = "Copyright 2016-2018, Netflix, Inc." +__copyright__ = "Copyright 2016-2019, Netflix, Inc." __license__ = "Apache, Version 2.0" diff --git a/python/src/vmaf/core/matlab_quality_runner.py b/python/src/vmaf/core/matlab_quality_runner.py index 63acec42a..1902478a4 100644 --- a/python/src/vmaf/core/matlab_quality_runner.py +++ b/python/src/vmaf/core/matlab_quality_runner.py @@ -7,7 +7,7 @@ from vmaf.core.quality_runner import QualityRunner from vmaf.core.result import Result -__copyright__ = "Copyright 2016-2018, Netflix, Inc." +__copyright__ = "Copyright 2016-2019, Netflix, Inc." __license__ = "Apache, Version 2.0" diff --git a/python/src/vmaf/core/niqe_train_test_model.py b/python/src/vmaf/core/niqe_train_test_model.py index f766a56bb..45576537e 100644 --- a/python/src/vmaf/core/niqe_train_test_model.py +++ b/python/src/vmaf/core/niqe_train_test_model.py @@ -1,4 +1,4 @@ -__copyright__ = "Copyright 2016-2018, Netflix, Inc." +__copyright__ = "Copyright 2016-2019, Netflix, Inc." __license__ = "Apache, Version 2.0" import numpy as np diff --git a/python/test/bootstrap_train_test_model_test.py b/python/test/bootstrap_train_test_model_test.py index 8e092af1e..9b76937d8 100644 --- a/python/test/bootstrap_train_test_model_test.py +++ b/python/test/bootstrap_train_test_model_test.py @@ -10,7 +10,7 @@ BootstrapSklearnRandomForestTrainTestModel, ResidueBootstrapLibsvmNusvrTrainTestModel, \ ResidueBootstrapRandomForestTrainTestModel -__copyright__ = "Copyright 2016-2018, Netflix, Inc." +__copyright__ = "Copyright 2016-2019, Netflix, Inc." __license__ = "Apache, Version 2.0" diff --git a/python/test/executor_test.py b/python/test/executor_test.py index 32f3181ae..fb36a4689 100644 --- a/python/test/executor_test.py +++ b/python/test/executor_test.py @@ -2,7 +2,7 @@ from vmaf.core.asset import Asset from vmaf.core.executor import Executor -__copyright__ = "Copyright 2016-2018, Netflix, Inc." +__copyright__ = "Copyright 2016-2019, Netflix, Inc." __license__ = "Apache, Version 2.0" class ExecutorTest(unittest.TestCase): diff --git a/python/test/extra/testutil.py b/python/test/extra/testutil.py index d665c02d2..980103f04 100644 --- a/python/test/extra/testutil.py +++ b/python/test/extra/testutil.py @@ -1,4 +1,4 @@ -__copyright__ = "Copyright 2016-2018, Netflix, Inc." +__copyright__ = "Copyright 2016-2019, Netflix, Inc." __license__ = "Apache, Version 2.0" from vmaf.config import VmafConfig diff --git a/python/test/niqe_train_test_model_test.py b/python/test/niqe_train_test_model_test.py index 5458591bc..f8841c0c6 100644 --- a/python/test/niqe_train_test_model_test.py +++ b/python/test/niqe_train_test_model_test.py @@ -1,4 +1,4 @@ -__copyright__ = "Copyright 2016-2018, Netflix, Inc." +__copyright__ = "Copyright 2016-2019, Netflix, Inc." __license__ = "Apache, Version 2.0" import unittest diff --git a/wrapper/src/combo.c b/wrapper/src/combo.c index d12526b9f..a1d8c940b 100644 --- a/wrapper/src/combo.c +++ b/wrapper/src/combo.c @@ -1,6 +1,6 @@ /** * - * Copyright 2016-2018 Netflix, Inc. + * Copyright 2016-2019 Netflix, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/wrapper/src/combo.h b/wrapper/src/combo.h index 2c5473ec8..8a54fc7c7 100644 --- a/wrapper/src/combo.h +++ b/wrapper/src/combo.h @@ -1,6 +1,6 @@ /** * - * Copyright 2016-2018 Netflix, Inc. + * Copyright 2016-2019 Netflix, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/wrapper/src/darray.c b/wrapper/src/darray.c index 90447b93e..c0eadee8c 100644 --- a/wrapper/src/darray.c +++ b/wrapper/src/darray.c @@ -1,6 +1,6 @@ /** * - * Copyright 2016-2018 Netflix, Inc. + * Copyright 2016-2019 Netflix, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/wrapper/src/darray.h b/wrapper/src/darray.h index ba8c1eae6..de161bc84 100644 --- a/wrapper/src/darray.h +++ b/wrapper/src/darray.h @@ -1,6 +1,6 @@ /** * - * Copyright 2016-2018 Netflix, Inc. + * Copyright 2016-2019 Netflix, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/wrapper/src/debug.h b/wrapper/src/debug.h index 395a3617c..cdf8e4ba3 100644 --- a/wrapper/src/debug.h +++ b/wrapper/src/debug.h @@ -1,6 +1,6 @@ /** * - * Copyright 2016-2018 Netflix, Inc. + * Copyright 2016-2019 Netflix, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/wrapper/src/libvmaf.cpp b/wrapper/src/libvmaf.cpp index e8e09de30..2f2572b59 100644 --- a/wrapper/src/libvmaf.cpp +++ b/wrapper/src/libvmaf.cpp @@ -1,6 +1,6 @@ /** * - * Copyright 2016-2018 Netflix, Inc. + * Copyright 2016-2019 Netflix, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/wrapper/src/libvmaf.h b/wrapper/src/libvmaf.h index 71c82f69e..a3b4145b2 100644 --- a/wrapper/src/libvmaf.h +++ b/wrapper/src/libvmaf.h @@ -1,6 +1,6 @@ /** * - * Copyright 2016-2018 Netflix, Inc. + * Copyright 2016-2019 Netflix, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/wrapper/src/main.cpp b/wrapper/src/main.cpp index 7c714d331..ead8b21c5 100644 --- a/wrapper/src/main.cpp +++ b/wrapper/src/main.cpp @@ -1,6 +1,6 @@ /** * - * Copyright 2016-2018 Netflix, Inc. + * Copyright 2016-2019 Netflix, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/wrapper/src/timer.h b/wrapper/src/timer.h index 2f6756af1..d7d005933 100644 --- a/wrapper/src/timer.h +++ b/wrapper/src/timer.h @@ -1,6 +1,6 @@ /** * - * Copyright 2016-2018 Netflix, Inc. + * Copyright 2016-2019 Netflix, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/wrapper/src/vmaf.cpp b/wrapper/src/vmaf.cpp index 98ab6cc78..963433faf 100644 --- a/wrapper/src/vmaf.cpp +++ b/wrapper/src/vmaf.cpp @@ -1,6 +1,6 @@ /** * - * Copyright 2016-2018 Netflix, Inc. + * Copyright 2016-2019 Netflix, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/wrapper/src/vmaf.h b/wrapper/src/vmaf.h index 9a710052c..0bbf882ea 100644 --- a/wrapper/src/vmaf.h +++ b/wrapper/src/vmaf.h @@ -1,6 +1,6 @@ /** * - * Copyright 2016-2018 Netflix, Inc. + * Copyright 2016-2019 Netflix, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. From 02e95ba523fbef0ada7006d0d47b9b50cd16a443 Mon Sep 17 00:00:00 2001 From: kjerbi Date: Thu, 7 Feb 2019 09:58:41 -0800 Subject: [PATCH 25/29] fix w10 error with using uninitialized offset_flag variable --- wrapper/src/combo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wrapper/src/combo.c b/wrapper/src/combo.c index a1d8c940b..e13458672 100644 --- a/wrapper/src/combo.c +++ b/wrapper/src/combo.c @@ -99,7 +99,7 @@ void* combo_threadfunc(void* vmaf_thread_data) int ret = 0; bool next_frame_read; - bool offset_flag; + bool offset_flag = false; #ifdef MULTI_THREADING float *prev_blur_buf_ = 0; From 69593bf5d79240f51709f290010980ed120a7071 Mon Sep 17 00:00:00 2001 From: Zhi Li Date: Thu, 7 Feb 2019 21:41:58 -0800 Subject: [PATCH 26/29] Update libvmaf.md --- resource/doc/libvmaf.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resource/doc/libvmaf.md b/resource/doc/libvmaf.md index 7a4e7edb5..7f1dccab8 100644 --- a/resource/doc/libvmaf.md +++ b/resource/doc/libvmaf.md @@ -59,4 +59,4 @@ ffmpeg -i main.mpg -i ref.mpg -filter_complex \ Here `main.mpg` is a downscaled and encoded video and `ref.mpg` is its reference source at 1080p. The command scales the first input video (`0:v`) and forwards it to VMAF (`libvmaf`) with the label `main`, where it is compared against the second input reference video, `1:v`. Bicubic upsampling is used (also see the [techblog](https://medium.com/netflix-techblog/vmaf-the-journey-continues-44b51ee9ed12) for the recommendation on upsampling methods). -See the [FFmpeg Filtering Guide](https://trac.ffmpeg.org/wiki/FilteringGuide) for more examples of complex filters, and the [Scaling Guide](https://trac.ffmpeg.org/wiki/Scaling) for information about scaling and using different scaling algorithms. +See the [libvmaf](https://ffmpeg.org/ffmpeg-filters.html#libvmaf) section for FFmpeg's guide to libvmaf, the [FFmpeg Filtering Guide](https://trac.ffmpeg.org/wiki/FilteringGuide) for more examples of complex filters, and the [Scaling Guide](https://trac.ffmpeg.org/wiki/Scaling) for information about scaling and using different scaling algorithms. From cd3f5bb9dba29047be05ea2e4e672f9f44f0fe48 Mon Sep 17 00:00:00 2001 From: Zhi Li Date: Fri, 8 Feb 2019 08:48:24 -0800 Subject: [PATCH 27/29] Update libvmaf.md --- resource/doc/libvmaf.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resource/doc/libvmaf.md b/resource/doc/libvmaf.md index 7f1dccab8..81abb89b9 100644 --- a/resource/doc/libvmaf.md +++ b/resource/doc/libvmaf.md @@ -59,4 +59,4 @@ ffmpeg -i main.mpg -i ref.mpg -filter_complex \ Here `main.mpg` is a downscaled and encoded video and `ref.mpg` is its reference source at 1080p. The command scales the first input video (`0:v`) and forwards it to VMAF (`libvmaf`) with the label `main`, where it is compared against the second input reference video, `1:v`. Bicubic upsampling is used (also see the [techblog](https://medium.com/netflix-techblog/vmaf-the-journey-continues-44b51ee9ed12) for the recommendation on upsampling methods). -See the [libvmaf](https://ffmpeg.org/ffmpeg-filters.html#libvmaf) section for FFmpeg's guide to libvmaf, the [FFmpeg Filtering Guide](https://trac.ffmpeg.org/wiki/FilteringGuide) for more examples of complex filters, and the [Scaling Guide](https://trac.ffmpeg.org/wiki/Scaling) for information about scaling and using different scaling algorithms. +See the [FFmpeg's guide to libvmaf](https://ffmpeg.org/ffmpeg-filters.html#libvmaf), the [FFmpeg Filtering Guide](https://trac.ffmpeg.org/wiki/FilteringGuide) for more examples of complex filters, and the [Scaling Guide](https://trac.ffmpeg.org/wiki/Scaling) for information about scaling and using different scaling algorithms. From a538d7cf4524dbef321c04a845704152541eef3a Mon Sep 17 00:00:00 2001 From: Holy Wu Date: Tue, 12 Feb 2019 13:00:07 +0800 Subject: [PATCH 28/29] Report aggregate CI scores in vmafossexec Also write pool method to the xml log when it's specified. --- wrapper/src/vmaf.cpp | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/wrapper/src/vmaf.cpp b/wrapper/src/vmaf.cpp index 963433faf..9475c647e 100644 --- a/wrapper/src/vmaf.cpp +++ b/wrapper/src/vmaf.cpp @@ -1006,6 +1006,16 @@ double RunVmaf(const char* fmt, int width, int height, std::vector result_keys = result.get_keys(); + double aggregate_bagging = 0.0, aggregate_stddev = 0.0, aggregate_ci95_low = 0.0, aggregate_ci95_high = 0.0; + if (result.has_scores("bagging")) + aggregate_bagging = result.get_score("bagging"); + if (result.has_scores("stddev")) + aggregate_stddev = result.get_score("stddev"); + if (result.has_scores("ci95_low")) + aggregate_ci95_low = result.get_score("ci95_low"); + if (result.has_scores("ci95_high")) + aggregate_ci95_high = result.get_score("ci95_high"); + double aggregate_psnr = 0.0, aggregate_ssim = 0.0, aggregate_ms_ssim = 0.0; if (result.has_scores("psnr")) aggregate_psnr = result.get_score("psnr"); @@ -1017,6 +1027,14 @@ double RunVmaf(const char* fmt, int width, int height, if (pool_method) { printf("VMAF score (%s) = %f\n", pool_method, aggregate_vmaf); + if (aggregate_bagging) + printf("Bagging score (%s) = %f\n", pool_method, aggregate_bagging); + if (aggregate_stddev) + printf("StdDev score (%s) = %f\n", pool_method, aggregate_stddev); + if (aggregate_ci95_low) + printf("CI95_low score (%s) = %f\n", pool_method, aggregate_ci95_low); + if (aggregate_ci95_high) + printf("CI95_high score (%s) = %f\n", pool_method, aggregate_ci95_high); if (aggregate_psnr) printf("PSNR score (%s) = %f\n", pool_method, aggregate_psnr); if (aggregate_ssim) @@ -1027,6 +1045,14 @@ double RunVmaf(const char* fmt, int width, int height, else // default { printf("VMAF score = %f\n", aggregate_vmaf); + if (aggregate_bagging) + printf("Bagging score = %f\n", aggregate_bagging); + if (aggregate_stddev) + printf("StdDev score = %f\n", aggregate_stddev); + if (aggregate_ci95_low) + printf("CI95_low score = %f\n", aggregate_ci95_low); + if (aggregate_ci95_high) + printf("CI95_high score = %f\n", aggregate_ci95_high); if (aggregate_psnr) printf("PSNR score = %f\n", aggregate_psnr); if (aggregate_ssim) @@ -1130,12 +1156,22 @@ double RunVmaf(const char* fmt, int width, int height, auto info_node = xml_root.append_child("fyi"); info_node.append_attribute("numOfFrames") = (int)num_frames_subsampled; info_node.append_attribute("aggregateVMAF") = aggregate_vmaf; + if (aggregate_bagging) + info_node.append_attribute("aggregateBagging") = aggregate_bagging; + if (aggregate_stddev) + info_node.append_attribute("aggregateStdDev") = aggregate_stddev; + if (aggregate_ci95_low) + info_node.append_attribute("aggregateCI95_low") = aggregate_ci95_low; + if (aggregate_ci95_high) + info_node.append_attribute("aggregateCI95_high") = aggregate_ci95_high; if (aggregate_psnr) info_node.append_attribute("aggregatePSNR") = aggregate_psnr; if (aggregate_ssim) info_node.append_attribute("aggregateSSIM") = aggregate_ssim; if (aggregate_ms_ssim) info_node.append_attribute("aggregateMS_SSIM") = aggregate_ms_ssim; + if (pool_method) + info_node.append_attribute("poolMethod") = pool_method; info_node.append_attribute("execFps") = exec_fps; #if TIME_TEST_ENABLE info_node.append_attribute("timeTaken") = time_taken; From 85ddfed11c59bcf8e3863e33d811c59b37806ab0 Mon Sep 17 00:00:00 2001 From: Holy Wu Date: Tue, 12 Feb 2019 14:44:42 +0800 Subject: [PATCH 29/29] Fix empty model name in log on Windows Windows users may also use forward slashes besides back slahes in file path. Looking for only back slashes results in an empty string being returned when Windows users type forward slashes in the file path. --- wrapper/src/vmaf.cpp | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/wrapper/src/vmaf.cpp b/wrapper/src/vmaf.cpp index 9475c647e..87650d185 100644 --- a/wrapper/src/vmaf.cpp +++ b/wrapper/src/vmaf.cpp @@ -46,7 +46,7 @@ template static inline T min(T x,T y) { return (x