From 20620092c71697918533e5d4720953f2125c52ef Mon Sep 17 00:00:00 2001 From: Justin Stoecker Date: Thu, 25 May 2023 15:36:46 -0700 Subject: [PATCH 01/11] update readme --- README.md | 52 +++++++++++++++++++++++++++++++++------------------- 1 file changed, 33 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index 2ad558b1..a6353680 100644 --- a/README.md +++ b/README.md @@ -10,12 +10,15 @@ More information about DirectML can be found in [Introduction to DirectML](https - [Hardware requirements](#hardware-requirements) - [For application developers](#for-application-developers) - [For users, data scientists, and researchers](#for-users-data-scientists-and-researchers) -- [DirectML Samples](#directml-samples) -- [DxDispatch Tool](#dxdispatch-tool) -- [Windows ML on DirectML](#windows-ml-on-directml) -- [ONNX Runtime on DirectML](#onnx-runtime-on-directml) -- [TensorFlow with DirectML](#tensorflow-with-directml) -- [PyTorch with DirectML](#pytorch-with-directml) + - [DirectML Samples](#directml-samples) +- [Support in Machine Learning Frameworks](#support-in-machine-learning-frameworks) + - [Windows ML on DirectML](#windows-ml-on-directml) + - [ONNX Runtime on DirectML](#onnx-runtime-on-directml) + - [TensorFlow with DirectML](#tensorflow-with-directml) + - [PyTorch with DirectML](#pytorch-with-directml) +- [Tools for DirectML](#tools-for-directml) + - [Olive (Model Optimization)](#olive-model-optimization) + - [DxDispatch (Testing/Benchmarking)](#dxdispatch-testingbenchmarking) - [Feedback](#feedback) - [External Links](#external-links) - [Documentation](#documentation) @@ -24,13 +27,13 @@ More information about DirectML can be found in [Introduction to DirectML](https Visit the [DirectX Landing Page](https://devblogs.microsoft.com/directx/landing-page/) for more resources for DirectX developers. -## Getting Started with DirectML +# Getting Started with DirectML DirectML is distributed as a system component of Windows 10, and is available as part of the Windows 10 operating system (OS) in Windows 10, version 1903 (10.0; Build 18362), and newer. Starting with DirectML [version 1.4.0](https://docs.microsoft.com/windows/win32/direct3d12/dml-version-history), DirectML is also available as a standalone redistributable package (see [Microsoft.AI.DirectML](https://www.nuget.org/packages/Microsoft.AI.DirectML/)), which is useful for applications that wish to use a fixed version of DirectML, or when running on older versions of Windows 10. -### Hardware requirements +## Hardware requirements DirectML requires a DirectX 12 capable device. Almost all commercially-available graphics cards released in the last several years support DirectX 12. Examples of compatible hardware include: @@ -39,7 +42,7 @@ DirectML requires a DirectX 12 capable device. Almost all commercially-available * NVIDIA Kepler (GTX 600 series) and above * Qualcomm Adreno 600 and above -### For application developers +## For application developers DirectML exposes a native C++ DirectX 12 API. The header and library (DirectML.h/DirectML.lib) are available as part of the [redistributable NuGet package](https://www.nuget.org/packages/Microsoft.AI.DirectML/), and are also included in the Windows 10 SDK version 10.0.18362 or newer. @@ -48,7 +51,7 @@ DirectML exposes a native C++ DirectX 12 API. The header and library (DirectML.h * [DirectML programming guide](https://docs.microsoft.com/windows/win32/direct3d12/dml) * [DirectML API reference](https://docs.microsoft.com/windows/win32/direct3d12/direct3d-directml-reference) -### For users, data scientists, and researchers +## For users, data scientists, and researchers DirectML is built-in as a backend to several frameworks such as Windows ML, ONNX Runtime, and TensorFlow. @@ -74,9 +77,7 @@ DirectML Python sample code is available under [Python/samples](./Python/samples * [FNS-Candy](./Python/samples/candy.py): Adapted from the [Windows ML Style Transfer model](https://github.com/microsoft/Windows-Machine-Learning/tree/master/Samples/FNSCandyStyleTransfer) sample, FNS-Candy re-applies specific artistic styles on regular images. * [Super Resolution](./Python/samples/superres.py): Adapted from the [ONNX Super Resolution model](https://github.com/onnx/models/tree/master/vision/super_resolution/sub_pixel_cnn_2016), Super-Res upscales and sharpens the input images to refine the details and improve image quality. -## DxDispatch Tool - -[DxDispatch](./DxDispatch/README.md) is simple command-line executable for launching DirectX 12 compute programs (including DirectML operators) without writing all the C++ boilerplate. +# Support in Machine Learning Frameworks ## Windows ML on DirectML @@ -127,7 +128,17 @@ PyTorch on DirectML is supported on both the latest versions of Windows 10 and t * [torch-directml PyPI project](https://pypi.org/project/torch-directml/) * [PyTorch homepage](https://pytorch.org/) -## Feedback +# Tools for DirectML + +## Olive (Model Optimization) + +[Olive](https://github.com/microsoft/olive) is a model optimization tool that composes industry-leading techniques across model compression, optimization, and compilation. Check out the [Olive and DirectML examples](https://github.com/microsoft/Olive/tree/main/examples/directml) for how you can optimize models to run their best on DirectML. + +## DxDispatch (Testing/Benchmarking) + +[DxDispatch](./DxDispatch/README.md) is simple command-line executable for launching DirectX 12 compute programs (including DirectML operators) without writing all the C++ boilerplate. It's a great tool for debugging and profiling in conjunction with [PIX on Windows](https://devblogs.microsoft.com/pix/introduction/). + +# Feedback We look forward to hearing from you! @@ -139,21 +150,24 @@ We look forward to hearing from you! * For ONNX Runtime issues, please file an issue at [microsoft/onnxruntime](https://github.com/microsoft/onnxruntime/issues). -## External Links +# External Links -### Documentation +## Documentation [DirectML programming guide](https://docs.microsoft.com/windows/win32/direct3d12/dml) [DirectML API reference](https://docs.microsoft.com/windows/win32/direct3d12/direct3d-directml-reference) -### More information +## More information [Introducing DirectML (Game Developers Conference '19)](https://www.youtube.com/watch?v=QjQm_wNrvVw) [Accelerating GPU Inferencing with DirectML and DirectX 12 (SIGGRAPH '18)](http://on-demand.gputechconf.com/siggraph/2018/video/sig1814-2-adrian-tsai-gpu-inferencing-directml-and-directx-12.html) [Windows AI: hardware-accelerated ML on Windows devices (Microsoft Build '20)](https://www.youtube.com/watch?v=-qf2PMuOXWI&feature=youtu.be) [Gaming with Windows ML (DirectX Developer Blog)](https://devblogs.microsoft.com/directx/gaming-with-windows-ml/) [DirectML at GDC 2019 (DirectX Developer Blog)](https://devblogs.microsoft.com/directx/directml-at-gdc-2019/) -[DirectX ❤ Linux (DirectX Developer Blog)](https://devblogs.microsoft.com/directx/directx-heart-linux/) +[DirectX ❤ Linux (DirectX Developer Blog)](https://devblogs.microsoft.com/directx/directx-heart-linux/) +[DirectML at Build 2023](https://devblogs.microsoft.com/directx/directml-at-build-2023/) +[Optimize DirectML performance with Olive](https://devblogs.microsoft.com/directx/optimize-directml-performance-with-olive/) +[DirectML ❤ Stable Diffusion](https://devblogs.microsoft.com/directx/dml-stable-diffusion/) -## Contributing +# Contributing This project welcomes contributions and suggestions. Most contributions require you to agree to a Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us From 7e42552d7ba8716e42523a41ff57f5df31501492 Mon Sep 17 00:00:00 2001 From: Justin Stoecker Date: Thu, 25 May 2023 15:40:51 -0700 Subject: [PATCH 02/11] update readme --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index a6353680..09cc4a78 100644 --- a/README.md +++ b/README.md @@ -69,6 +69,10 @@ DirectML C++ sample code is available under [Samples](./Samples). * [DirectMLSuperResolution](./Samples/DirectMLSuperResolution): A sample that uses DirectML to execute a basic super-resolution model to upscale video from 540p to 1080p in real time. * [yolov4](./Samples/yolov4): YOLOv4 is an object detection model capable of recognizing up to 80 different classes of objects in an image. This sample contains a complete end-to-end implementation of the model using DirectML, and is able to run in real time on a user-provided video stream. +The DirectML & Olive samples show how to optimize different types of models to run even better with DirectML: +* [Stable Diffusion](https://github.com/microsoft/Olive/tree/main/examples/directml/stable_diffusion): illustrates offline optimization of the Stable Diffusion architecture for greatly improved inference speed. +* [Dolly v2](https://github.com/microsoft/Olive/tree/main/examples/directml/dolly_v2): : illustrates offline optimization of the Dolly v2 model, a large language model that can be run on local hardware. + DirectML Python sample code is available under [Python/samples](./Python/samples). The samples require PyDirectML, an open source Python projection library for DirectML, which can be built and installed to a Python executing environment from [Python/src](./Python/src). Refer to the [Python/README.md](Python/README.md) file for more details. * [MobileNet](./Python/samples/mobilenet.py): Adapted from the [ONNX MobileNet model](https://github.com/onnx/models/tree/master/vision/classification/mobilenet). MobileNet classifies an image into 1000 different classes. It is highly efficient in speed and size, ideal for mobile applications. From 668a7a6c982e96fc18d5497d5d845fa7310fd58a Mon Sep 17 00:00:00 2001 From: Justin Stoecker Date: Thu, 25 May 2023 15:40:59 -0700 Subject: [PATCH 03/11] remove outdated doc --- docs/CompVis_Stable_Diffusion_Instructions.md | 102 ------------------ docs/sd_conversion.png | Bin 41680 -> 0 bytes 2 files changed, 102 deletions(-) delete mode 100644 docs/CompVis_Stable_Diffusion_Instructions.md delete mode 100644 docs/sd_conversion.png diff --git a/docs/CompVis_Stable_Diffusion_Instructions.md b/docs/CompVis_Stable_Diffusion_Instructions.md deleted file mode 100644 index 4c21e698..00000000 --- a/docs/CompVis_Stable_Diffusion_Instructions.md +++ /dev/null @@ -1,102 +0,0 @@ -# Running CompVis Stable Diffusion on a Single GPU with ONNX Runtime and DirectML - -These instructions download and set up the CompVis Stable Diffusion v1.4 model through the Hugging Face diffusers and transformers library. It pulls relevant Python packages that allow the model to run on most discrete consumer graphics GPUs with ONNX Runtime atop the DirectML execution provider. These instructions are based on the prior work of [Neil McAlister](https://www.travelneil.com/stable-diffusion-windows-amd.html) with the more up-to-date script version from the Hugging Face Diffusers repo and its dependency packages, as well as additional conversion steps for better execution performance. - -## Overview - -The figure below provides an overview of the components involved in the model conversion process: - -![Stable Diffusion Conversion](sd_conversion.png) - -- [CompVis/stable-diffusion](https://github.com/CompVis/stable-diffusion) : the original stable diffusion model source -- [Hugging Face](https://huggingface.co/) : provides APIs, packages, tools, and pretrained models for a wide variety of scenarios including stable diffusion - - (Python package) [diffusers](https://github.com/huggingface/diffusers) : offers APIs/scripts for diffusion models - - (Python package) [transformers](https://github.com/huggingface/transformers) : offers APIs/scripts for transformer models - - (Python script) [convert_stable_diffusion_checkpoint_to_onnx.py](https://github.com/HuggingFace/diffusers/blob/main/scripts/convert_stable_diffusion_checkpoint_to_onnx.py) : converts PyTorch implementation of stable diffusion to ONNX models -- (Python packages) PyTorch ([torch](https://pypi.org/project/torch/) and [torchvision](https://pypi.org/project/torchvision/), and [torchaudio](https://pypi.org/project/torchaudio/)) : the ML framework used for stable diffusion models -- (Python package) [onnx](https://pypi.org/project/onnx/) : for creating ONNX representations of ML models -- (Python package) [onnxruntime-directml](https://pypi.org/project/onnxruntime-directml/) : for running ONNX models with DirectML support - -## Installing Dependency Packages - -We need a few Python packages, namely the Hugging Face script libraries for transformers and diffusers along with ONNX Runtime for DirectML. - -``` -pip install diffusers transformers onnxruntime-directml onnx accelerate -``` - -You will also need PyTorch installed to run the Hugging Face model conversion script (`convert_stable_diffusion_checkpoint_to_onnx.py`). By default, the conversion script will output FP32 ONNX models. FP16 models consume less memory and may be faster depending on your GPU. However, **as of this writing, the Hugging Face stable diffusion to ONNX conversion script only supports FP16 if you have PyTorch with CUDA**. This will require up to 3 GB of additional disk space. - -**If you have a CUDA-capable graphics card**: -``` -pip install torch==1.13.1+cu116 torchaudio==0.13.1+cu116 torchvision==0.14.1 --index-url https://download.pytorch.org/whl/cu116 -``` - -**If you do not have a CUDA-capable graphics card**:: -``` -pip install torch==1.13.1 torchaudio==0.13.1 torchvision==0.14.1 -``` - -**⚠️ WARNING ⚠️** : Conversion to ONNX with PyTorch 2.0 does not currently work. If you encounter the following error, please make sure that you are using PyTorch 1.13.1 or older. See https://github.com/pytorch/pytorch/issues/96944. - - aten::scaled_dot_product_attention' to ONNX opset version 14 is not supported - -### Hardware Requirement -Since the entire model must fit within GPU memory while executing, the GPU should have at least 8GB of VRAM available to run this model. Here are a few examples: -- NVIDIA GeForce RTX 2070 or later -- AMD Radeon RX 6500 XT (8GB) or later -- Intel Arc A750 Graphics or later - -## Downloading the Model -We first need to download the model from [Hugging Face](https://huggingface.co/), for which you need an account. So if you haven't created one, now is the time. Once you've set up a Hugging Face account, generate an [access token](https://huggingface.co/docs/hub/security-tokens) (just follow their instructions in the web site). - -Once you have the account and an access token, authenticate yourself in a terminal or powershell console by running the following command. - -``` -huggingface-cli.exe login -``` - -It'll ask for your access token, which you can find on your account profile `Settings -> Access Tokens`, just copy it from here and carefully paste it on this prompt. Note that you won't see anything appear on the prompt when you paste it, that's fine. It's there already, just hit Enter. You'll start downloading the model from Hugging Face. - -## Converting to ONNX - -The model is trained with PyTorch so it can naturally convert to ONNX. Since we'll be using DirectML through ONNX Runtime, this step is needed. The script `convert_stable_diffusion_checkpoint_to_onnx.py`, which you will use here is just a local copy of the same file from the [Hugging Face diffusers GitHub repo](https://github.com/HuggingFace/diffusers/blob/main/scripts/convert_stable_diffusion_checkpoint_to_onnx.py). In case you don't want to clone that entire repo, just copy the file over. - -``` -python convert_stable_diffusion_checkpoint_to_onnx.py --model_path="CompVis/stable-diffusion-v1-4" --output_path="./stable_diffusion_onnx" --fp16 -``` - -This will run the conversion and put the result ONNX files under the `stable_diffusion_onnx` folder. For better performance, we recommend you convert the model to half-precision floating point data type using the `--fp16` option (as mentioned earlier, you must have PyTorch with CUDA support to use `--fp16`). - -## Running the ONNX Model - -You'll need a script that looks like what follows. On an NVIDIA GeForce RTX 2070, a single image currently takes about 20 seconds to generate from a prompt. It'll take between 5-10 mins on a CPU. - -```python -# (test/run.py) -from diffusers import OnnxStableDiffusionPipeline -pipe = OnnxStableDiffusionPipeline.from_pretrained("./stable_diffusion_onnx", provider="DmlExecutionProvider") -prompt = "a photo of an astronaut riding a horse on mars." -image = pipe(prompt).images[0] -image.save("./result.png") -``` - -### A Debugging Note -When running this script inside VSCode, the relative path specified here is relative to the base location of your project folder and not the location of your script file. To fix that up, configure the `cwd` (i.e. "current working directory") option in your launch.json file as follows: - -```json - // .vscode/launch.json - "configurations": [ - { - "name": "Python: Current File", - "type": "python", - "request": "launch", - "program": "${file}", - "cwd": "${workspaceFolder}/test/", - "console": "integratedTerminal", - "justMyCode": true - } - ] -``` - -If you have an NVIDIA graphics card and want to try running the ONNX model on CUDA, just replace the `onnxruntime-directml` package with the `onnxruntime-gpu` package. Do not keep them both. Then replace the `"DmlExecutionProvider"` name in the running script `run.py` with `"CUDAExecutionProvider"`. You may need to install NVIDIA CUDA libraries separately. diff --git a/docs/sd_conversion.png b/docs/sd_conversion.png deleted file mode 100644 index 1cbbfcda447cab3229f4f9ac25e5292c599275fa..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 41680 zcmb5V2UJtrxBrXfsHmW#h;$1@LEs34-YkH0F*Ior=^+H^(t-y?1OyZWq$@}#^n^|n z5Tr&3p(H@W&>usakY;08tr}iBF0zR`p zz5mFUjg7PY;O|hESCJDNTduR-z1wEb>?xDSpIJNr7i&kvbSFiBw*&D*MW zh4X$U+kJoI)ES%e{Q*aFY)xjXRX&Fya%t(&+5q@ys};Sp9Pe_IA7+}eeZ}q z-);i@KUufXII9hKcfq1Sq`{lqIeiu=g=*9M>=sbM$NSFuiD%S)e|dU>jqNfcj5_WL zROI>9b;JKrx-%3$S!9~`pP%jT&db*wW4jQY{y%EpGdw|meuVQx6!7F~QlhAV8=Qmf zIv41FG%mTg!N2_#NBb3^AuDx$ix1OnjWWV3;2~O1`0ILE*aXBr)s+|Ni!hLeK-QPYVQ8sgzCqQ->L~aUpB3)U(X(QOG-7 z{f_`BVG&nIoaV8A_H}Dvu-*3Q*S{hQq>SDOYfkNF2?vZhyd(Fj?(dM#FqjzDe#p#D z(bLT$VNh#x`+Bb}&NxzoL4I$hx!!TUzyfJKS5jIo8zqdW-`&J&g5!y`?%=k)#W#LA znn&Lu53y-^7wPHbZ_$@4G6u3#iKiGO1f##!ZLsYXblNOnF+cSc*h1PUUH%pk-h`Zt zHv=WaQ~!~N4u7WxPWoHgkZqa*JXLKk(&5PV^)6wrq*|#@=_Y?YDX~C86N>NTgw?q8 zS2J(FI5s}BRgeLfWvn(;NJ&C%gYN&^GimQc@`g-(p7l!AjbRl@p*2C|IK{s2@2+_+ zY+g%Hdy>@DQ)1T?fUfY=n{A;@)qu(nTp=CjbjQSo$enRD?arU(dP=UWd7TTviI+78MvRlFL*XDKukv=0olnTvd#n1nfOtn%wb7;Tce2aYIn>F&k} zg#P~O^7G3-kv)nQ7JC+f8e&9);d|uwi*;_Q}@|`U?|(7p-eYo(I7V z+IT=3=CH-BMNV<3t&ft8O8n*5L&w?JzI_toP3{4D+s)$!UZnPJ6sLoG_w~i*-)*NT zHCZw7f~x5us}15C+S(<>Bl3+O(`B7wvPSwhhruS1U|z+*1+Beklve1A>bR` z-dl@J7mQ27@|_#Q$UEeFr1Sonf<~pzBH~B11y%L@_g_;@hxdx(GvMWJf)kQUSI`63{VUdCssxSH-!US;$tY}NT8xX>76*XVa^4Z~_>B1GHhW%Gqe zNe-zG+_RnZVYLW-B{VQYD~HE;H1vGHuA-6zW6>5%`=ER~FHeSvCi8;vyYCe8OvYJ) zi)Y1%O|R(!1G7)}P~CqIHm-7x&(N8UPmcfR4r#43IwgeYSvQXDu7#kGgk$RA$S@?i zD9mo?r2(Ul5>hAZce>V4%0}vfzJAxzMz2g=cPlkAWpj5vhe-ZV65Sjh9d7%OQkrC< zF`gN`RD7drH!;E2G)6TbacOU`|9#DXO7B1gd2S!!ciL<&(IF@0lWd$XX?b5~i*_#; zF!t5F{m5v2Zt3YV%L;p74gau-IOw~r`JC{n)501PuQYOk=F+GU2*3ugZz}2MZncRR zl`3*9*Srx_9kKsS3M7T0Z9mO`?ZdY5FP$4^-5=34>Nr;!+R-mHTkJ#gJ0A9DGkv~E zRy03h1iz9G$|S$qPSPL*gkd=pCmx^w7QROo4hTD=vheX`5H~?gG(4CSjbPH*$+i6w z!V0htcJX(JpISR5t43YBjL4mmdtq~(alCS!(w6LHolWlCxw4{<2;p>%{*d{sK!Udl zT>oj({CX6~p1mIK;uc&dTzgv3M34on1|^5=+&f+RoA(3_D9bZzmP&9Xd@FRk+W8&% zp%nH^mkiFglhC#L?8ctseA{}j8L>R)@MfLmC-OAh$}mGg%0bAqAwG+WWbk{3YnIqi z3&mf@06iFU*GN_(-!5QWbk9u9?n`WXy~MbWSyI5bbLu!&|3_}R@N%v_R$X=9Rn!(~ zjXD|iMwwEJgF$38V{%2vo%~*gMNB+Rl6Qp0KzRqpR&kvbbwngt8RgXpNltHKnv|AD zkf$`cjrzy6yTN8gEs6Ov*6mWC?%fD#L1``x7dvX|;^C4FxX>k4=s>AsF*DD4BDY?Q zUUF;73&U(1x)i8px4HdmL13_IXy3A$;>Pa>vpe-1W|rFrg|ErtH>}jsEQ> ze@$$4Yj%+nl|-FP14v=C-{3=Gm<%s`i8*?${xh-VvwB|a{+J1d>@`9U?=nHL8 z{qtgmm0_$$uDYOE0=fpPK^!@nB04zei?V(d3-TCw>3GtY-A}eI9+5PC9lpl{34QlF zOS(hT1$r`5EzTf!Qu!AmKyrMau_lb@kq-W>MJ#f`5wa77nnVp!##*z*spp*v&7ZYj z!*Apws(UC+)tQ=rnV5|h;Z1(%9sLe$`!I!Jfbwc z!^W!xlzm+kSA*8KHRE3zC8$6Pkm>&Tqa_#Z^k^xqJNl?!=_aT|psqtAhNg;wxi@Xl zeN_LvFDzr8+>Gn-*(9UvPLQ|EVt*P8OmL(D{sIvZB*H4rjy~v7`D{I43y}91>B|oB zYTb3|$@cPg7Y|4MCoiYn@!4Oyv*LVg8ve6&AS;Oc=N zwq}QIY_>_~rsQ?p*IV?(s=KZNruSLyXQv-I#-O65I{2i@r-?HyeFoV4sgSjnfXS_p z9%$#P33;%6urk1ie;Z#93H2E6U4bUVa}Wjj!5d#V5`q?OPh9aB;^l6K4wR zWi?~?bkEKT0^VR$I$)M{g~Mto%lTUSHI2nZ4TVtlWH%kWMTuCy`rT&QR0z;B-ZFxl z?pYJT+MwfsUh)VEYV?;8GI`~mx{R#=C>3ARd`;&~WxIaj-YFftl?TK<{Z~;M5?Zuv zTN$stL2Sqi-5S)TDlLq|!gS3xC$H<5%AR`CeC$}0X57bCMMrDaV7nVxG!pi z`wA5EenzR@u`omGo1p6G;k!6(@9-DUp)+i3Zzb<1niem#*<&qxD0L*E-8NCXt>wZD zTku1gtSAUX4PVkFo7ddXpU_XQO-`Bmdk=8QBJWnMjaG7*hlO9DO(3Cf@iUeaaS_!U zkS7JqqN8XLcApC@ZD{$T&xTPn(1qiz*&NixXZiXn+U)62HDP3zy#E zRNs0)95wo9CWp?2bT!3zui%V$!V0O$Q-l?_Nk7D*QnF&8ocONJJM>r9Vo{>RlISyd z2r6MqaTe$83t9o}Hli3laj+*|1NKCf;MRwF`CQ!0+5`m6Lht>ZoTdN@^!!yz-y%|l zzoxnGke>LrLUq7P^OUOYY^d8x5f4k?a;|F)K9mvy_CN6Ii@9!i2eQ}NSc*!C<~wct zij_gL$7@L5z%1m(t|)0%1u-afo*2L_Mdui@f5>*WxLP!OUB2udX!qC)gW7HN)ej3g z&$$qurRck})~`kkx=ODTTto*|?9)w1rE|Z0fYBx(OlvX2%{KF~@XGqIPd{1*H!jN9 z3mDs}(Xv!h(iL}YCADvA#URYhA>0zlB=6NPUSp8bAIyax#f zuTJ1;?t&BRGL(L9!0(53$aUY#f3sYHnG(||PqAW;xuX{$P1tTmW-sDv8^4TtZQ6T( z;|^%fKm+a}+K~MI*-mdD>J!v{IaM-q#wT(PERq@veZH+$WOID5acMqoqERO@=lMe1 zs44EcE%-5uK^u3?c@~k^@i2Pw&~bssqjwp`xa*H3Rv!WT(bV{ennw#tAhK1LVbax> zW1%Qe3lq&xx5r{iCe%+Mz2VCG2-KTi8O%g^6Gos;o!}C#fvnJ2zQn)?mOVw`ml(6~ z;B9k{RrKxU&pGg=i3Z=cE$zrwFvDa%VMKfR^DH!IHEyPR^RtS~!(<*sW9$bHjC*q{ z#Ha#PM#{8E!*GhK2bj($+?nqnCuqX4=7ToWb2mI~6gLDqR0Sm7_{ zMzpeV95UmHImv(^V)NnFlYQGYg0ykXI9<9}mL7VbFL%oUG1SWt_kC{rajq};(eS~} zAKuE&_Ex#n0-4(Y0uz{buNEpn@?YUqYFRP&zY4Ga zlCBTZvh}6_)A+CaqxCD2egksz&x3&C|DTe#$Pg!U|pmY?l z8qAS$yI#2iNek$^gVO(7KtD)Jw;3%l|4*d>pHZqGkUQGBV*l#v2Wim6LzgF20#{*- zKU*T(eMR6KHtie!fD)hmq@OH-#%dY?C4N78pk9IN|6Hs8mva68EA~GD1Yz2h?fdKT z8rP_tH_G&dJWm#~ZmK%Worw0_VFxO49RiGw>}qbTw5(%SJVz9l|H6;2hH0{*2ffGE z!=oqr{0*_v;l;Rjm!Q$GrtYbWH~%)uP~bRF?9Vs80bM0qt`o;M*LF(gbkxbBiw$~7 zVh6~8?ZG}Eq~CIzWi+>Qs=mB?>lBZ)$bqufiUhh$%i?~yQNhK7u1nK0l6_t_Jt_u{ z1v>n>;bq`1^Q{68(cb1kzCZs@Jbk(J`fYm@lpFOO_~QHPhvMDm#f?vE0_4L?C`-lv zB=GKmRa)XqXB>ajUEn9eZm;%TSi40vPrfjm_BYB+nK0C0e}ar0)k#*5=;6aaPpiC5n3fmNb1 z!Gr)Q{ss7x^7$~^BW%2hp~?5p9(`BwPD7+yD}@gDAnhjK7=Zk4HyJMspb*fILSsQ9 zR-`TmO@yoBO3X{|3<9zU;0@fW9BgLCOjDzQD(qfb1fbAITRVTe`5d4!s+h1(v>O1+ zIWqV4xUk{tSn0;=nFoWu8*{*nJ`Tl{m|Qs#@xAN5aH7-Y0Z67aGQp;mYEt6bF}Vew93HLgNpe?bh81E7Ic$~rTYP^-Tt0Di zPxqG5{ltF+Pymhk4PJl0ljS?0>@w9HR1iX2EeKqjT6cZT4}v;H0ZZLIN?UI#+0xz} zbC)cikiD6F-dO2tit_6C(u`{vXbo_qU6QEN?NP)vTp;pMP3J_=d{!ivv*`E297JOf zfV29e#k1Wocd_SUTW#6_PEpnFoX_+I09FRTP%=`aYzWRCtiBJo+FM`VR5l&qIA&Dg zPed>%l;GkD2hko4(GG&WH^Z25has1xr|~@WljPllHe>+}xT)y3@W6)5$4zg+`aPpB z7J7zvygo-AcTmqzMeIGx2$gU*f3jfzMKnQkzh00AX!vn=5Eft;?Nyz|5U!l?{RX@J zu_{N3SA`_JW!pm4*tzvljm!9F>u6^Tg1JIlQhp-(Szk7}Pr;KQpV{a}@X{Yhq7|0* zGpD13S2pZJ*T3xq7O#y^@RktJzD?y?J(;tKQ~xt0cu};;UIw#QJ^AVgr_$w2i2&c* z#InU&B%I_9Sr+>z(Uk(YRBTS1z?Lxk-mE`MRZ;f+hx`}N`^leo^<3ikKr5%pw*>c1 z42v(CJD2q1<>NCfQG+(Ws=9;#jBp@bN7}^1W4)T!Ur~g^_tw#C0swYGV?-5C*Y2A# zz};1D#MOp*p&Gzej5!qAw*;ft6hDo$4&EBt2VbRN1Z{}3n97w%9{b8L(FF}k7eGD; zSmDs$|4@!->A?!}fUCw%1+a^cj8X4`RsGEB=Q@-BRTUB+XKZt`=Z%h2>HsLmO~}#z z5k^OV8Z0;}?|{B{Ta??ndn7aY$k(CLt6Z#NaA9Za;;;5Inzdx~Od*Eti#-q5e-`i; zu{iyodwhf%k-OBeWn>dU>?sj3)Q+B>b*HI@xxKmsG^JI5boW<`K7q)Z;LFRx{0)YtbVO5-AJ@KPbaRLy~+gCDgDo| ze7ePEeGo3Kj9weBe?R$bDSK_JCGnU2O!7d-1pX2YSSw?cLEj$w>aXl?BmI9-Wb`zo zf!2K&0{HQ^fY9x;v*(S6m3WMO8bUNIK)L0H?DOk_cvw6v_5n^f7Y2GIu z%G|(i`nRr7kW;7%4L^_O;#w;MUIXmI-04$1=BugPAY&uU_MM7s^^kM` z%MuZVQzA%3cLt7|=io_np+{9uwC^;q467w^SmVWfI|8O;l3ECSXAqyRzRWxK&ynL|hQvmH57n@ZHU7e0086jX>)df-(;=uZzPixe zji)`;zFz4TQYbW}zsy9HB5(6Ta2BxnwPAR;hYIER_V#&`)iih0hzkBo0Ni^fW%+4) zbiZkLF2i^0s*qODpeFA@^>z}Tq`w8GJ=DPdrx^exeuLitK5k|u*RKQpY;8s3Z|(MS zusFP8&zJA?DEiZ#9Q;kIXf*VrAYQGBC%D0#dsW$46svzTYYAT}z9rbdmM&cQ%PSR{ zkgQdbR>x|Ac9wkxFWXEIE0btoiIS|5YV*7@%IMl156q(Pu!=``X{Xh4LD)}fAvK89 z8b)y)+}Pspu|~&NBtj$8p&l%E)gE#XoUOso^3|XIG-sDqF+c(F262Mxr9)=$l3nvB zr5#4y4zl=Ri9rjlqI4I0Kx2;fMmWjp{nOF{%V7Z(J2Qi_mtfEsv?S9|uYqgKjL60o zj~KOOmCr_ai03$2k7_Ck9Dcs1FkhJ=ZtjIP9xx zA;s$SgZV@^tQACE?V9&#-rs{YQI>S$WIS%#)lckIQoFu;YkynAFyZs&nou><(D$?| zqd-GJM!FqDFc4!Ko^uj&)Aztb0hW34d!h8+?VV9!7Mj?&NYlIqlnA3UA=7Ai4#8-jWq%!`|tn58|8*7D7j=`m+hfnj$4?Hto?IUSNMouxC?=!j`f(!SEg(SWj$h zU-m+9teP&YIRi*Vmfbbm7QsrEaBk5?%%j^TD5{cK9Gf7-P55j zh;`R1UxU^2i#J64scd3QiNOs6B~!92ukmob-%FsP1pw>MzOIp3Ag@kc7hI_r&A;r!u7jAAY zie-t) zlzmMq-os@cS2Of*#Bmv|*dyo&m*>dfzNxq?Fsz|oKYM6y>!+Gw`4z#Dc~iMSy6#*< zWWt9+T_aFZS2GkFX@5lKR)0JcR;V|1oUg)@mRcv;HR3(r*PJF?xEJC*v5^=5PIV1M zO2Tpro)5S4TTuzn{C1Nu`nXTvEj0`{WxGoU6%pe1I0N0E@>ypCSC*a4=NzOS9hH^X-F14fauE;a;r3UR8Cr@ zTVVJXo3GX;voCuaI_`e4xoK>RqB3S&3EipIv^?cvUcS-#f%i(fH z5ifn~RGL2IpK|Q=2(`Kk_N^1_HzBc}Or=w^!){?~t8kRRT2u0x1NujoSL)m2;ZW#G zt=&p&-4_;<#QKX^{bIL*MVHwOdpqVP7{5G5fHU@wkIJbYC&`!`*CQV}(n(kYKdiQ7 z5&}Y6PmGw8ARcRzzZiNNjD=I1i^mQ6S$j;c07fk);1qS)OVdjpX##af%4!PcRP?pa zb}byGx-b>=j>!l5S2=*5duTw)>dey0ZBVe9q;cr`cv3W52b z9`c51cWE>3Mnjuwh=8Y4HCg9v*K}rP?$oMmg*wxvO?ih&5T-Qt1#6-c%}D-? zz__$brlfXZtLI;={_RF`gIw=!p8eI-&vU9ZX6;CpqzZc3g84WB1M9Bh&XNmj&+`YcNAfNTU<$I~)ZBs<|h}&mnEdD}y)wvb604ScH-f6qfDEqXzSkN`BxjBx-tBd=lDq zqqEsRN12?M)Te+)>s!S`=W8<=kj7r)LVq{>>RDCz+$rB*IJjP&0Wymcpr8T^ZxYy^ zM6Z!PXJ_dU<#*SuJU^k$i7TxT#+~;oEc7((X>pZtZJ7RmJTk+}H@SZU zG(OCdrjfORP^xeZX@bMU>!lUEZWsDG%}9M!=6WMRQTAcQE~=ufAxO-C>)=-&vnaddQD@GDk7IVgsDwM-h`1E|3Jo_ zWnNGMIdz=Lmrc^9ScYf&`5lOQfVQw7#G!2gg*W#`Zs2boJHL11J9odeIO(Gvhacs^ zLhVCCyE+OoN*rK;7W|Ce$x)5ku$;u+*1QS@;>c_KgmU>s^ZsWvLIs+6lL&|OXOe2} z-buTDXxJx!gG*rkO{_^ZJMD}|_(_8yEFAolIXRtso`R;=%Dg|ZkQ=_vFlZk-On{^w zC8ip(r`W20j+(UBB^J#6&`bWJ;9xeiRA3GHvF{ZF15 zV+e2k8cqLrH_i8R0@1=m0(S_^wdHp_C9r8f^W~;KgEk>aAYya+7!!NHJm`2 z>DP=6VZWM#cdau7pdr!?yL*-LIt;CO|LQ~`8zkvxA^jq~>1n%uN$(xdD#3!;0u2*D zCcK(7icNgVKrQY%}Ecqt8 z;r`Hz)&toL{K<_X3G{e5q%}VaD`QFPZWcw`Vjt3H%RRh9eO31hg%Q!I%EwDy5lWTK zXkl-k-bgc(tp?_P^Q}nLLU_A+%aE02)6{Cf1l?b=tCv%=JWUGBwc0bhIQ}4v^p6zZ zvwZmOU+pDflj8U3$zN>!=~4k2__I8Q=axgu%Wlr!-xVT`1@1U6`3clY6J)wkGfTA^ zDs#C7KguVhyf80l(Wwn^O3&Jg(xlS4a9f}G_giC)#$9)%jugaYoIBH%GccdpG~nHi zDP!R|m)G#gyICXPJz4*Fe&dOXNZLE<^6hmdy;QMa<&XRZAi)j15X841G`EnKpa4K& zSx+Jw1i(M}mJk}6{hqcr%53$Qy5NK?-mt!GqPpkq*I==cpUEQ?x7$SZbxxY|rbdil zTYQaB0s1TNU&@5i-wfbcx;CmsOyUh0vJO(yk{xFr^T>ahNcPx#YT$@ z^oHN#WfGPIFn#{<5Q}tayzE49xit{mHd^Trw%{=@fG?NWjC%*~;^ua6Uy4=NqnHEh zy@brEp_Jli)GIsivW57!Y@oO<8L$zlLES{Ly>QYYn4`7Bj8vX()UmZ88{RmxJE2@1 zpba#sONKZE+&|nv{*9k6Z64|vxKZBcPc$PD;u+qPCid=2#(NGJ--$|S!gk_K&}^%_ zlZl-B*B9$gCu+-MC4B8R>XD^g)To_V1P=%p%xDIPz8A1p{a@DOt#(X_y7WD~p?mDp zv_$#x!7B{HFMRLFMZQ9-t=0K}X6TO+x7+B6i{C_yDxQL#L1gT?m0K^bINdTOXBByi za@L)WgtU$e3}2-lTlcaIwRFq;#Y{W3vZGRRJ1&)&YemYis2XjpO83Qc?H5@OeO`}t z5Rcmi$DOZeUZ3Vzk<0OQ4TT{{zoWwVtGP;#9UnFLAdTOXMo*TPxZ}$k7HMD10}|7) zEuIRjZA+!8i{I$eTeIHr9olh{?n?B9N*I5l#;_t02_iqcC^C4z3R#xEQytD);eCsgxbNHWq4#~>W!g4v5CtD_wYg{04x7Qt#5{r@lVCT zxkViYn1s@q-n!o{*uG)$Cm5q(!zc?hgv--7mU&@7?o6u_V$1$#UQQ!0Z4DV*pNEDv z8ijpZ8fF%j+Y@umIXu|5PrFs$B?`e9j;{ zNbcD~B_JrMoQUYu*Xt=ln2hn1qfRGIRu3s${5ENLe_okOyH{oHtz_S8jjB2%PvH`p zIJPgN*#XGOhE!7yOpsxl_YYT!>R^?Go}9n?kI&hYCXI(ShLUR9hCaw#4D-Y;4?T=) zE!&l#PShkdS7fPdJF+*$N3m%6sZe!=g~g{u&xOQ)G%hy}1}(Zl+#uMHFoe}ZwK|CI zR*;$xMj{7~@RyP7)%ha{c`urWlojAh)GewrV@Si5Wm)#`Ki8dFKFnI8)}`IHobtL$ zhK>-rZG%R?QB`%RVQL!o%%%|akKDH`IU2`|=TlryQCPZxQYLP2s`6tKMp%on%ucIN z*TiG7_{=B!6I))YcHX_-d$KKViwQrUKzQ~LK_%thvBv^<^{>k&TL%U-;3XMU` z2LBa*&ijoAE7<1zNW@ZL2y?@1^2)RkCo;)Km}ybbndSSf*d<3NgupMTvLL#zL(N3S!2H zlEyl6ja$^)syb~a=G9a+0;XAUb|y?0Wa9j7vX{ixMpX&k5tUk=V@N98mB+XR5enfD z3sY3s$NJgVR0G)3Vg0ugXpitEM&d35-w!F=7b>7r@e%i!S(kY;_(Tlk3|n+U(lT`H z9jvpgo`!;vmO+`@<#1+*Y2+#OCSaqg)buQ zV6vHp%d4&J$R;#-Bq}7sQAOaKaq|6MLk*n(jLx1X4+>Ial-&Q%@6C&8O5VB3Kc9~+ zHvvH_i5CEy`(T`Pk_Gn^E28dDiC&68@C}udd|_EUTR&X@qPi>MyD(Mw>h5oKjU8S& z$@?PogoaKrp6`OAwZf0juQ`Aju3@>6G27E6seP4lj{S5X@#BEzoGc(Xf{+%Js(M9B zuOLd5N4j49&N^{h;Wj+nTa}@TG{v1wmYeI}85?;@ZLaZnd%2V<8?15_axLu7Rw(g0 zgt~L!LB?K`s@C(?%T_lQU6Q>;BF~DGYWr18RznSEp5<Vpd$A^RJ|9$le(i5ckQ*` zuQaF2-SymI5iFB6VLpjEIHO}~h5h43FA(fE#R}2ZNF=|7A6GJS{k9A@u7qR>a30b^ zFx<8!-9yA04E224Uk!RaJR(seZb`k>1zNbsd4<$Zf0DUq9r#qqOlzL~ZCvhv$kw>>tT?Za`*zHuVnNu>&KQuxz2ab+Mt8ko%xV&-e0A#&ByB(!F<9a~~${F$9 z=`wEXp)|7=cObu#mTrIx#z-2*x|>UGrt^mY#;c)ofTXl;Xfrnr&!(MriU+p&8e;@y zy(d|+H=jDj<1Yln7-t+m155hsjU|(W?^w3yxr^hD7^)ShYt{y+FFi08h(ZY;jPgGx zg0lo~UymtZKXEnVFt;A4(=|J&AZ*_%2Z#xKke(o|XJV?;;(==*;9ykm>9_tC*-(5K zt6#!?HR+$2JZo~J|1!Zf-i}gbKY#-Aexkr;X9WWf!dysT5`7%b-H#M|ZFsCF1NvsW zsVjJ235@EB&K)=sart%Vo2@H)$FA-eDK2a@9bt2&u8bD|*|zKRaZw!nDQN^plMT_U zK$BZ_4uUzRx%(Th_ugFH>6RC?Q}DYk48#R&O1FK=%}Xu5c_aYgKSASndfhCyb3pSw zKA!}81f0oU%tfz3PnmGvU8ca2jE(X&c$WYY=) zGUlH@+(>d)X@o7j6*5J21;_gTxr=O~tE-!ewNnAIt*4rmtpp>!SDnDZBTih~VZ8_D zRv0k1axcel{rARpn(g};Ap7B*pP=^Hm|XY65#I~6+>5F8K#(pHnvfyy&Sm^AKX-rn z>Yu18kkoTIasS7OJMXOjt@4)?UsM-;lAJ7QN%&kZix=|xzs0q*iWh~QfC9gyJfN)K z>Jo7DNch&$pt9_daENUd2-v> zYRAIvgic5Dr0+8smOVVacB;FdYay2oYdI^d^es*oRs#c?I5tx#)P3g3)P6dUmv>yx z2MR>Mnt0s?2bo6N|J#5E;m+u`kb*eESNsaI zbZ_~ucuap73#CnNJq^uFiQD@9pTWG3>e7Fcd|A?T3J9Z7YQEDZ10ne^dk(guOF+3I z7^oQd%3tjXoI+mk2DjHiem`~|SAN{1kr0Z^CX!>TeRYrD3L>WABkj=~R&$?QShE>> zZLWn^7?7Sy!||nPio&}0J-x&EAn#!(9CF*7I%kN*(GM`~nJ%_j6TS84hmQ0CS>_%| zvp8ebezm>d?Q?JMa^5m8F=H=0%=YnbUp(?oPEn*2IZ2WqP4uafJrY_9-hKl{$? z^qlo|MZjo-sK74xz}5op+X~7n>$`jOmUb(cvhF`>m---NZ^%F<4ed#M1qJ|x8W3%` z4ssSDS9ipSmcD4U)>sjcxh8RSBQ??CkG&ceI>M$8L_uGl^bc;3l zZLVM-Fs&w?TDkuHBl}#)T7ywIVKgVlvCtIK4on8;sIG)(S1!q-*HUVHcKLiQYJ}!e zV%N~*;Tb}l@1}{Ms=CA2nWmM;2qSW;*B<3eQ~V^sRuD4vCA*!X!ob~-ufTZH%P2Oae?ig$T|+NG(5y-N#Aa>lH|bKf!&(7Alo!}D&My0I&h>lxdf=}fVSB&_%%aal zem7b^rMh#pLO+L1TO6QuBWsXsg{GqFeYWH2YpuJtKIoh*NN9P}+X3l8-R-9s0x+Gh zdoPSbGJxQJ9G7pYi(6e$2$(2Gd31JBLVjr6j!xkvV`e08Wkystm0yC7n#vr)RSz`J zIBhasY4%ktD~cEf`lkNlu+gflF)7TiI8MpBDAK%N3vZgr+Hd7WY+fs%2jExscUJOb zR(1f=UDZnn!`l5KR(i%kUHUCag!NhBVr?V4luOTMpN5?OqTDo|zs`RIaC2Ll#wN{v z#Jw?7TBBl3ll$ta zcw`1-`5fv^k-wQ6Kgw3!aV@1H2xD`;NjcBC)G!AXKH)PN&@{V4Dn^(pTFWCx^l%an zCqLZiUI&g26glT#+lYh4C7)&h7Eo$NojP{yz)ncfNMFMr96LCl^y^z}zp?5f@BPbl zp>$$~3zqNr%e%9HJx~~rWwQ1#tPfK!fO=hHfdGJK~xSO@O9kJpLB&Z*pJVMw;%UidJ|M@P{1lXYJ@Rc}Q}{1?~S{jn#&v@d$I-~@2yLsHD`UDu;q#o_mGj-49POXBcGg(?USLfn3xaSi|6vsk>LpN| zXT`voI93jZ)o*x7d3?pH=2h4&;HVPpG_`ly#VSX)-}em0DS>$-E52s4+^UhVO2hY# z65QLsJ)x}r1gerHskO1?-1@gRAj$ILY@Nz+|5bqiL)?z?HIsyNXm-$V?V7cJ{3+f; z8{3mMUNz=V4VThb`!*$>Zf;(F3ml`P+3H{V!Wm%IR#PBc0rwBMWxx~aK0inmM8M=~ z@FOh8u2?jfxH(X|MqI3oCsphM;fkfpU|*VGe+GDpI&%G#Y`$SVk*yAPP`Z_xqQvYqPQun~JhL*{w?H8W-{<1}EGk00)K6)t< zd@)=tTgYx}U%w87(aEW3&;fNGBvsK!;h0?#iwUs_QS+I(Pm~$JYO-K_ig3V@WP5N4 z2oWEpW#rA`!#4KkscAS13g;~Vc8l1z0szj4G-yfu{5MoRw`VwC!jgyIlCaCAy6ZqO zvDcdhD76=HaiAr)#j;WPen&|&tEVU?s(q?sp%c^gYT}^bT)ZD)vO{H zxCh*O`~E0dm|tx$n_*336n<;PBIXSO$1fgje6%q)9C*GJY*tQK{gec+!6js=Psk6) z9r5C$BpERLqu0)drF1*mqc*9$=ZG>VQSi6QGA&+xkgrism2sT|uSu2OjNg0Nf=?Kg z#zrA>U-#c9(^?Xjo-H(?F!U*sWUI^`GnxAR$mTp&F-tWNjRge84#A@WrAfI*xy5$b zt^^j~`3wCvn>LuRaiqazZwXOgJ}KA!Y2y=N1mJTS1C}|wO;*M}me_Z5NkKUou5+3f zOcWmWgNLil7$L-OcRddiy!^fVvLn`=YXcYwVQBAkr=$^!>A#OQj*TeljUYDFgmefZ6NcaQ83p65~ zmesoXQKk&%X@gVLD9oxxo##PRr7I97n9^CleMx1}q6Uajv=5Cn+WK2-hS;m`kA$r= zCrX)8Xp3OO2Pw@6IEg_5Ppkv{MODYJ1qZXt3%0UGJj|+S&_Z=3XwZvV_Rr$^00k8u zo8;JXs0la-HT_^VfHIS0Sf#khb~)j+DmY;~SrAC+y4P*e+vp0IQ^%nNcZGHElV?Eu z$HD!d+tl3f=rnoO*PU;SSh9VBc+$RFagVm%16C}H*Y=C>W4f5#7amoTD63YlTGLqX zw*)5Iz}so6G*G?~yaqM$gsZ)Cv>-CgQ1ADcnsX9-m!vU1^Q$R=Um%y4j!>v zQkcL2^f>cZ&z`a4!#J5>+c*67O+I$b!=ymz_m^Y$5tC{fmUDrklaR$FpEzj3TGJYk z7GVr(Hc{CyR<`bpM*)df8m|uhi05 z32Q<o$#F$`y`#?@dkaeqhhOgO+X&TRpF-4&nu1o4A!J1{Ki?~M`XRWs zNdar#OxpN;#UNXCR5(~%*=JVF1nr9zDxFOSFhdS&PaG@6lN(+L$l}+2fDX$oYH?QX zyd9-j36WddPUvq7jbc&tDc*A ztui-rw)cp0RZUN@d)hK`HxBDtPgbT6_0;4+3^S$QG~hJ#C2Eqo=qzXOB%x)jlN)D6Xg+3@15t*A8%X1Qg545#$z% za^XCI=`&h<(H7v4G8MHu+fVmoM}bn71r!CEy3c#GL9!Kt0!qOM#UP*CW`IcU%Li~s z@BPnt#(+%!45U=QJp^dXs92NQoV8Z^Asj8D$#cSQ|8mC6*yox|koqsJj5iAmx*zY8 zV-V#Y+sRf;5bOsItu`J@b|Lr+$URGBs@=FB0m{iF%#Q8UCg?R*} zTYTD;R;CBbRj-i)8WVS(42}lyUZcN>meFl*jDEWJ2q`&}Hgi>56z_h*l7EfVeEo!~ z#E1!>CCa)ivaLsy^r1hT`~C741(hcqgAmg7SY&U=` zh;YDbK8kTEXm0ck$#zlMUOFq%p-QyQxB3!Y6pX@EjPhoo#9B3@(g#wmr2+l@Zwncnh31yS^tA`Rsx=;2SucW8J5%$Vq2oL> z<YbbB3PwN1QsQD2?VpW~ zPy^ROT9wh{cLB{_+Z$f}wc;)v^LUQbi(MeOU?Ho`?eG!)Xtdt*_;u|Gu4$Xr8%99F-_{okIK)_Q#Ln} zYWgM~3r_lQLdSp9DkJmV{qK~CjgB%G_+ccZZuPq`p*mtI|KA}LOUavYyrKB(n~M|q zbBnuq?TLqrQ%aevk>Q3p=A%=6F``xTh5mBOA1`ve2t$~)1zzx1Lpz1%8zjkZ>mfJrh#6IGnTBIQ5v0shixIMNJox852ej*6z;HJ|JZzeR~fd3Yr8VjbjGs9 zY+6tDfi=Kn0x*9R<~X)QUFh$}S`CWGffkVlU*H_aE~S5#ybIhJx@)BPdM|!>&t=@t zZ~TY(#S+j+Y=d9B-TT^-AP^Q%n$?E@By>gD=<0@I%tg@g+fK688LT;|XpepjgSO@c z`SGo$E1ygf`Sc%kc^sN2H0|`^{!+($+-@p9o z_W{4s?$r)q2W%(rRHGkop5pZk#Gh+d zuy~jCH*V1RZmLl>g6_VV5Wd&mm#t15xpYQQWmqHJ@W25h0`)qVh!eRQp66!jdlqQH z=J03wD}MO>BK2$qPd=Ijgz;XR3ze|Z_Itm_?(~#a$PK141g)6bDI8a z9wLxHu3diZ-z(ag7&c00Wu3h3aB^ZhQLxXb!x8y%%@&o{=#y zDn12#jx8u_nR)tZ)7mdo*TJznrN1O>hl8|$GZzW(fbf;p%OZ^GRn7nOSKi(MQrQQJ z8*lg0)A|o11bX#dqKsPfUfghQ0YE=>f<7LMvDX1_-M72ZxVIXUl9_wd27htcSI-j_Dk?Qwa!&0AX`tYCk z_r0^(9$jAwm6F!j-*IF-ksfWP11I@!ozd9&I1U_Xy;fM!_s*kM`>5&A4uTTA3DBSd zn~c)_M@AuIm&SbSg91BsBMa;rLgSQ6ADxl&U)kCTg=c4|kepMb_ey@KRcLB<#g8gQ zrcmY@*5-6qeD)S6gGFR|5rGe;0Mf%gd(_nI^LaoNzWn(x2B6>CLw_mWb^^n$;Dyfc zEA;(eWW9G(liT)&>)u*RnKN(+nhMivdUq`*Ii@+xaW?CV^7p!{u(Q!3>Hqhl0@q>*h@n*)$b z(#tRjyAUOB(wuZEj9`2Q@l%Ev9d3Q0m*a^S+Wh?`bl{LXUX(U4k%GesCQqfQ^}zi7 zIK2;RM&*~+%<}WV4LOLq)C5^yiu)(&6R+6vJtWVGW$u73~@L!5jubSaBt75`xNOC=K_?P~H*~V}kaZ4{NrL>5F zP4}fg&2yOCFVkB#AoT%h?*EQ}PNyMJeJ$$+Tp@HFi23oT!Y6wHj|yEuHaqUu7=a)=svhiXcH`C6%)eGkA64 zhG*bl2lh6*swN9!@sZ}*bp?=7PZ4W!F=V*UTuM;UQwLL(Y4IU&;JJZIN})W}$hd zz9RnfBhF_fw$W!$MFJ#V|Vf8mB$>y@y7CDZ z=AD$c`*^aV6=LEL@v)@+k34r}M2Ho&CuZQS)=h%R{dX;E_ilm;gg=aY9 zbV56D1M~u4qifD^Qy{hC{8C=hwIG?1bQy1#_~q3v$estVCeuZj1p{j-~lKc5%^wjRA6bv)rdup2A8(SXyB@^a& zMa=&s^dz|P=yMnn!C&T$iyCzKO~vCY_InCHdJLvpib`X&QDr_pg}=`n_bU^=F%d#m zCS;ifQOm~UwU(YfbbnV^5!L{4t4Kw454?Z>Iae6P@Pgg)k$MFz26u1nw7IkVzh90J zn#|NI6vZs+fADp{%req1@#~VgUkdQC$Tq!Duyjz|u1%3+e0uG9YTD8zPm$X>CrFcS zG<~Du=s*Jn6d?Wafv0caU6Pkb&F~1ml461#iicNYlB>SQaV`#RmT)_y-+}dq5 zQuY5@%HeR?y5dvGkOy}u;xdha{VA+Ab}-q@{SVt|+fL`ksLk&?(!D@spT@$G^^6A8 zlOl3ub9%K#f$a!9F{XQMT>N9sf;Qr+uk?#9-2H}r6`H<|f;~GYM26|%YQAXaTCB^o9gDd*Zs}wh zi7SD@bMH(q7ezCadjMpb9fmh=Xjp2#-lpnbk))S0i{k=^+weCw>qNPL~S@e#|> z25D<@gZ=hPv5x}K_le&M?O(dp|7tDU{1N7>RqQLFA(_bA*)OzA8HT>pZ_d~QM&P@E zCL-+Zy3RawL{LJ_ejhr(6=%bb+&aM}^zy#Suj!D|taZIJ7j>^rJ^gk~=Q1>XR(_eNB3Y4>7}CM(VhF z-c^VX>6+pPo@SslFxxtbveOeS}&-B(?FX(?v&M8XWxTeRRM8S(rO-NoBwt_|0R z)skUxzJEdC?z~+W9(7lw>Z(cR)8K<9V)I1>M`g4R$8Lf%=Rg#20p%vb-cN?3MVEPk zSMJa+?`oj3PC`0K>E7BVQHX(-L0svQ>ML4yro!;_WpFJhpC=BDujye6x6(Qm#?l+_ z9cVZM*WiC`gGZe5qt(L~u_Hx{hX)(e&q^If^u$3n9nj*Apw<_9n={7oSLp8fVY89^ z0|$P~RT^bbro9*plhuUE{J+;Y7av-=kK=Ls^{KZlH*aDD8V0Fd&}D6zg@lUp?dl-Q zK8;k}Q<2OEPH;oNyZNH3L-1oK{v)VJore-9cZGYSB$Xl_nXk^MucaK0V~~`;7PKU- zbrg4V$ACL)x8w#s{HqRdUM`4-W&RSC|5QOwcOlZ z85VkTuCdY-?|>6sM#&P2%OJ}NJYapLFVQWR^X zP+aWmaZ;}5*yS#xrd?TheXeqWinf+I^J0o&g~bg%B7KCB)|#l=gj};j;xo8$sTrQH zEiqrlw|`$6J<#9Zzu=A320DiVUD(5-v*6*-o2t&n(It+9AG+||!VmmVopn<|#SFqn zTD?ocCGy|tu$Zm29)QMh>G9=;@x)!a(<)V5b_-h=ofwjp3~rj%H)9?nG#-fI_R8Nh zV*^KGPidT#-|w}cTAv9@6Ly1(-V*%iY53A}Tb`zB!bV+AhY-K5vW|I1?NR=|n5wF{ zJ8LdyHJ!GkvcM^@wFyH2(y$WHuiLDvgTk9BIelH zwx_zv`6V)lx&?OEw#tH)gd>fPAAC!E)AhcTD-5@Ip*kD0pAYnh`CtnZnT-fuH;v;~ zSefJAlO{1OppxAU?U;wWU<#9%Q`j)-Sv^jaCY%-M`!w5A-ysaf+D~2+5x(n2J8nI` zt9>u$K~u?e1rjTWRIq8;Bk~V!SIzZKj$}Fbjk9njZ-pbx5k`gH8{D79SNGNM7)u} z$q&->b_*{Cf_0mANk7`+3&bmoQpx=Q44_Yq(x+5?XKSI$Mm8mj0RN_qF}3{vrtOo_ zLRHDMknhFrVyIBZ4z%^?!N`M+oo){DisNoz8y~gT`4mN?j>WAaLPTGS@-f0O}TCSw!%>^~7>CPmD!#(~7UI+Ps z9}i8}x8Of(t9y+l_QuvzJk}?JrhMy5RX{!;@iE$D%lB)s*)_|}oQ(m`P3I5iKFw=& zr?bgWZbCf4YM>2v-+av!8~UCH)A59>JQ`0OT1G>yCNnBi5YP<>Z?PhSwRae zCMj$bYiT!eQpN!9)5~g7<;9V`s`y|kYD!#qH96bs$)BIh>>vWX&+nAc z0{YTrXdxH?OLS#Zqj+{NO?yx}5qIKJCfzF(1*l?CyMfkI97s!VU}o37V$=IyJx z#eSn)@!Jel?8FOt_MX61~4kp6*TG9jcA7Irbv%VtFOQm>A-=dYS6*P~gSz zCTCcjWTkeULkuh;>*U=cOqyO&)3Q0F3F2*pe$_^t5ldqamVZ*<(3~Ubq}!v}GMKEd zEz{0IerYKw-O1&j&Ir7Kzo(q>NSSC0xR&dwYFrYQN7b~{?ELGclgKf{dUI4k0AX-A zob&`r0e`2B`EDBV!O*6%&7#6FX3o)n!c#39>{G}fi$|W_5M(mg`9-`g>-~pJDQdr> z@2E!5+#|b()ryrpNy&U%e#j_8gzFHVxFJx!l*`}q|F`gsRg6V+o} z>sSt^58VVWxtWybS(YUoQr{@eE)+dY@>5&k5sMDTiHTjpw6-(}SC%}t<|eT6D8XY& zQ;ev;^hJY*hLmQ?uB`)&>1@q~3tX{xiNogZzj1`6m7XW6VV*-1m4Hj&u=8u?E*blf z>5}NwOmHcr#YB&Pe+0LM+HCHV^whX6BoX-~W!3=6rHQHP1M~A;-*pDVpE8a=;QX1o zRxE=$i1`uoW*??)S#xbG|G&Y(8|tEue3Zx)A93C3cY$V=ptE&14S66T&Nn#%Q4GD6K3b8gp5^bQkKS*?aXod;rk{c1)5F@6uYH~B z*laMs-K0~|_PodjkDTxEJvQ0OrsUl%awu~_vk4pfl`pf!6BmxfeIf}?3w*r7X{jmr zbhte+IP$&oLBRfcC*idzhkX}|tV0nO+ho_`a49EtLHI;-g@%tU3{9!6&Z5Tj;hDsQ zsrQ49jI>YGvB{E0cPYYCs;13!QEc;b4V$z_~xqINyfPEED98a}q zZX)NwTWtPT*nd;{Ak2GWVIBF}@bzePORN;@=GwFEukre}I19&9+Y-V-h0$!lqrTLt zdBS@m{Ia+PM+fxQ(8!t7U@v4C*cPQ-f&SYkLrSl>ixKD^Y*-%QCQqkZgjIW)E>2r# zNHk`DWaPCR^r6=Fi9=9Cp`_?bGXfJ5Gouu>^gzq&@#XT>=V9WdSC+qd@k{OR8c!4A z)Of6LA)6Lr)qzX8objt8GtT*n#LmrILKypyseZ|xsKAzJ$N{Qn(p3=K( zSt0b)sA^?Ivz{+3h^8g^pe&L{{1Hcck-`(LbkRItc3TiNnK7|T8?g$;(zb4Swcu}% z_YGb$SCx9W^Om%h_i`0A?aKdqOlXMbRh!f*cz$pD+bhb55_O!PI1&F6H&yRfN~^|L z?yoyKw|PdA1FdxZb^}EyJE1z4Z1&CsV<)Ji=C#(C$+O-D$m6|@Jkl3?*^MjSRgX(b z@n1z{?RGxr&`f+I{mr(s&0T_@7BB>k3r#O~Ek{*&6C1pQEBAMj=;HuCv$HAhy*PH$ z<(Y!yYOV$kYI<&%5k=rnqn4-r3v;Q9ks*D1#g?T1wDQ=5%2;oTguWt1vo&3&F)*9+ z@@oA=2o(#?NT&psJW=^j1T_-&FmYvR`X5)!fdcS++@6&Gp$K(DeNs*)4%vS`Tm>@J zvr7i{-h$In5}vcl`sG&59s<=YhyJH*{rY>t45m#h6-{XwPC>-mX=w^~e`wnnV@PfW}$dO-1e2QVwRd&6!J7MHj7xik4ivrbJgX~uC{8p)dM!mVj$ z>`4yRe!rvBSm$kW<&$rd{kPL-c}aGjvGQiiy5NppDI9iZh;MIpm_ht0m$s6X9sP~R z{VtD-g4mAk9`yl-cho#Gc%l?K26$so3jNGSsY$b3t-$xL3RJ%2tfm#C6(0c|AOg-t(s@u&R zZ;t4uoRRYm+9`DL5>V%63n|*+l^N_qyXBO&hdJ&q5W5^;%a%V$XW*$B=96wphg-Yv zuNC&iGpKV~_Sy(8WBU4XaXFtnd-18K&Hq9<0xixPUj=wV2kV4{0^*63&*S&;r=wCY zk=mwT1;-i@%3kXzJ;>#he%p^p1gpyGv=${FfP6>zI$@U)-Q%+sPyk9;xEX<%|N1q5 zUjw1jwCQR77h`aOu##35TA4~ZS4-UPS&|-2B{lt;!#ZgoLRVXL)TdD+hD*K9YN6HOQZq_nVk z=WUTfpHIxQyrp_aTX1b=QV*iyAE|GA{HcEc=@wtY>-3Hmo|?tojfTY$xGKvVRwmkd zzorC9-O~?S(O*@sDld4=Bnj7s=4Od@;Wx(>q!sPD&N5Ng^vf68z`}5K6XC0<*&0*gDf7(kw`{r&@7NdMbu zH@EF>tNZ!rm0oeM)ZGK%;2Q(@nqOCK_v)``&0t3^w3Vw2d(Sj{!*iqgzU`r#7n003 zR{4;%&Eit|qJVL96a^7z+iY}o(E@d|`Pc27vCN}?X`}%$Y>G_{ZQwT%#syL3YL@~=B zR3dpJpK6SK@4e*kx-VsZd@hW!w}3r0Q-f^)*UNW2ed?f8#~6={;S_hE*_Rc)WuI8Ro|eSW6vlW zv``<2b?ez6_glp*n}iZ>4~EpwZ-SG}XP>pHNxu66ocIA8$kDZ2)e>_nD%q^da65$aIo2Vr5XJ4E7`kA{WjMQsslWAh~lBQpnA*ypB~qxAd}2X zAF_DDu?sZ{fr%EK`q%QGPq!hbz=J~*sO$djJ*|_^biE|)zb8PEc1vp=cy8xO-+gc3 z>TpHIey9b}{bI?!PU28|H;Q+6^lwW*N9=O1Zn>le#uL)b_)p*PydPAr`V{>y{+2Kz z>(0=zN`ynG>K_ydMp6|G$p(>ZCgwftE=WPS^%>Nf&0B!rr_G94O8qwzLJ|%PaJ;wr z#sPS#u2m;};N=$pxy&zn1nLcSn&3GPj_=)K`%hZa*RaCz4GwKNS5zoGxp`C#C3>#+ zvx$07hd2e=Kb>~c428u7S131X6&mY3V=|cD=;9@&-U1v5fY#$Y7SG$Qh_K`BY6Huc z-58`-(NZ3L>e(+MStNVph)7&r);tGL7*!SNjsvZ)Qy_Yw;K%*}KMR~>4 zHFfRt8VL7O5%0H$r$3CSSBV+6mPErC=>{#*l17LUJJT95)i`cfrBBb0EuIYhR|2C*!;O9f%J!eup|^>~%iy(jOx}Jm+nA z+<3&eswCGmiwp8Lh)@no7WS?l8qmFw!=dj4e?~|=J>Vmm6p#rx|Fz|5$H&Gu?n}!K z@mh51bbo@G_)_!5`xU{Lw=>(j$NIOVG3-lCmAL3PPGBsSAb;IEj0JE|Q_sLVBc_+B#@gw@|}s$lQk z0EC_OE%j9n=J)wK#hZ2`C`R|#W1#FH@?@1V+wv|<%*u$wpm_})D} zIRoOjZK(vdP$B}rZoJslz1YA|UQ-v9fP8O!YkXr?pDibP$D;fxSR{{u=NN_}iVeDB zG5ipUnV5fwkOrl4SguG0g_xoC$>9zx`_mh308w$m4>K`(@rXciU6Rf!${ul6E29;0gn-bI_g#EQDelczzYL`r+ujCP{=)nU|LNU zbqPBn+W}QYVfV)JHKUp7^psM`a_~KF|D7s-*d-Kmd8j@yoCnx4o*_(|&b@!icY3jq`0nlz9a zbU}+ujaol``-&76+y5Q`BJ>W=W3BnDP4d=8s6UIa;E`Q{Eai=F8F=Bmz|7oj`tNVg z;sbom<^<&e2dx?Yb47;wYX}^UgrkpdB1^hJ(zX$8#f^wv9QTl+2sPLDLIdKwG6#lm4)JBYOlM) zrLC<2Z_?ple^+X~d~x1dT7Pq;vRGj_Fx$6lvmu+h4bZxgF4_P&Zwwxl&8)%8J{How zyKBCBdZNY;dAF)kgSS;c1+h@QVTO z@2l7zXo9eFyt!*cth&nZ4e$DR(zCmRvf2v*+dq{ zh~gmj9R3>Fo6niK`oRBhMAeJF>bnvFZ#3=KeicFhq6#$> zX{K&fgAUtJDrrDEZRjCICu>e=+V4uy&{`wpHrido#u@arok2g9&>whrkYEu|CwDX-q+a&@V!1t%A@Leb5-Nq4ey|s1#<+87G}wpRfYG&?^l)9jg14TNm1d&*NXHx z@e9EBRnC0XYZy>VWDXls{bG0tZ)}aqGK@~w25Vjph*}6g5Q#&UflRp8xzMcGy5Mt2 z8vXtCM=qt6eP=SIRaXZRuRl1?l_quIbQ5Ore?~IBIbq{_$L!I=Q)BNk6k6YE8ReKB6ca3aBr%ii8swo^)329W?<`zDvk5@DLBX49RBAWw zE>aruwv6_qh6VZ)Jl`Yf^9h=H#TQK@>tL}xXqrh+o{S_DtJPG0>VDr7Er%kGum+@* z;dzMf$?nax@ z8QsC9?-IShxCQWQz-`kzvBZ-?x~K@7I@L0_q^A!O!y|+>nVFSZ;?SwsILr^I*z?Aj zCOJ)%YjG;wx=DCO#dsA>I7Z!EB#b3ljuK%^w~bagx-snogS(oXvxeFF>g)on|H@d$sbOR==cdB6oqS1Dn8aRgL;)W<_`72W&K<9ce~c z=GTojiVkh%rP{kGm6>7sK$s;PF$RwNxJq2ecSyaA!`4pguRU+Z${@4cQn|)PTQ;46 z2jFWM{YR`b6Fe8<+u6lo=QOPqxGzcVZo*M&#xZK?9?3H0j&zZ*rzRWQFanM>mZA;p zb#Bc&Zi5h$2mp$^r0v?P;u_5PS>BfP?jMblu}IpQ7#WVj**TkbQ3!&e&_L~K4N5I}zt4BAG(gc+Y;|w#52n@Lv?-u+>}dp7D>x7<22)T#44F_Nn6ih1 zAK0>&;ERn3c^zefG;KMG-6wg$>+a(@=3mOU=eHr&{0ibjc*nFuG(E^B`W3d^FF!{D667(=^#MNBLC|?dX*W%oi?26 zWNokoTAMu2xmJ$4#Qt}m*)=n`0PwA02}}(vfTN{KT9=1&5aBdJJY zsnc4DnXkV@@bL7_^zmez;(lw$-q$BoCHP@lpv$k<*PO=eu1HSfXvVipU5OZKq0Gj7 z8`jADQ0HDmWh8MMXc4Bv#-8!LOLr-9H^%ZsIJcNNPA4@Tpsh7~Y|}2Y@9Ue|=2LnB zZ>=Vhj*(&z?kWlSi3(o7;zzwngxBuzc+XV`&-RRLB%o&qmIHv0h zLt1rKCD>+%mcZ)Dm8>sl(myx_hIrGjR#g84o=~O$%Q|f0EN@4|-*c>Y%w4M!Qy8-r z63!cP#8HVj=siA_{z`+5!}S%Nms`6$^hx+pd-tO~yTl$8abyb^GjN2Y$ea1ehYa}P z9n$_>O~p4kE7v^ClV48W*YyNfb zKw(C1CCKv5gtnNO7zLv2v+;z|s0J5gnQ3ak- zGgAd&i=@0C3m|uHo69!@JMlB~&PB-2ResE7=zS+UXRRuPKL=nmMZt%)6xMsrGf_>F z+VsAwdeZ*R2#m>QV#5|9*yW)I^_TFg*SR013?+Swjv2VlcNPAH^^>$Y>n^yYy zN9UV_Ws_a&${ph0Dsr5z>ULRspwN4cpBt9{0bO?WT)8U>RR|!?II!iTB!d7!?XOoy zg=jB@MaV@oh6}>GGnrGX?b_H?Kk6r`z$JxyCVuaa$~sD;hiItGzqlKwR+_C{xQKDbhX5#s8AJkM&sPz>Klq4n^F6oFtx2HutL-$NsNxMY?{f+> zEs^s5dh@CJj5lPLKDjgRdC)MQ#i0|I3FC_kl&rP-^m41{oOwAg{06lmktgp_-}s(s#3qZ1USg`a%9 z{nt#Js|AzH#f1GcOS!n3=I9p7a}j$L+4f0{qI>1HLuL^Aj_n6*5VB6)20IrEGv zXSb>Fr!vxpqLIb=t|@H)K#GbAsh#S@ei6y5(*EN3x^h_0kW#a`oSYkjRVhNhQ8Z5t z)92Sv=2OmqjV*M6g+ChUY})*{l}(S@HoSEhA>?Gh{>zB}DqEIz23Tz}{Q?i_O*rO| zJC;ND@(C=Fq-8oUtoYTe3ZgOkI{dIqZxylS^aL_#$ipif7h;mbNB} z@~^e01>@d@miP5cy+U`^Sx;x@nkI=Ij3+e!K$jPa3D7Hb4mT=<+tQ zXD-~j+r4F&`t`yVkxa@A?xPvnTBX|Q(^bjEY5$~JYh|L=pyu>d8Y);0MPMD5Zpyfl zlK0wJy-*{-_f-!BeG*bFI%D{fGjT;|Vl&ma$D_Nf6@Pl9K(B2D00#0v(SeHwXZI%KPqQf{&l1I)B6crT$yzl5Xw zI-hvQ`xTM%TZjT%lk~q>2}=70_ffU0!9!P-0z(z)9w}VgB%VsjJISmmJKQVFSFa_x za%gJ@$2$H0r8}7<)D)^$vN=G!q00jgPB^Gw zcFhRl@PiQgq`4WH6D)zsn7fi_vm{s)IQ|Jm^@(NF!7_e694DPX5-UNKOhG9m9RMH~l zvgu!GCe8*1Dbwm+TE%(inP50JPmIN+I*Er%$AHDn^SM^+{yLKww+7vq3`_a#EHlr{ zG}O8k2dXTRqJ_45ERaE(GoC@QMw_i=zzWN8QqOBCZ^V1CdvI}V0rS!>XwLnZ($Zp{ z&T2X$DH2R!%6H$dhn>9T(GLkX42#`u^GBjFcS9Mf!GxJv+0air_F>?qI`0!}o!7@i zq%Xjb+RPA!a(4=R>CbKv0Agn#bS@sA@r+f zgCk8y#kjk;TLA*%+*zSn8L9vl>YlwGsFy*(^j0=TR`SYrOz5s4>&-s zFAGwIEq^&T=nW=9Hm_E(orHXJ%nJ3JzuTRSjBdz%gz|t!p~Px*Ksk+zGCE$lFzwP* zndTZz+#6>)0QvIBD1AU@Q#IfqWkc#sv9%q@RhRPVTZ|#@(FdkqwSsrnIL}*OI_Yln z0pA7LPM;~k$l<_ZjB&>H9sw^iQSr3xMw&Q>A?U41Kvv=BZ3?5fshpxY^!dfG#x*wt z11MwDz)NkVo|~xcu}R?=Jl}3x80kFlfa(FVg^^?~m#9p?MZlblU+QssT(xAz0T^ha ze{5>k-omocEx@g#_6+^DC1Nyxdl-lA{m0GN00@9`l#6u9bS?P)Wieq5l3V5?|NDI} z-ymu&SRk3iLxU7q1+uv-+T>woh~_6*lwSdx5A5?tHu)bOd^zW29&y1k}o4hspb{m+(nJ?s&l5%B{*4>Co1D>pfhdn?0`IRDtJFlwD1;ayry2-&Skz=9QErN-E*# zQewVCdw%uIj=j~8>)Bvp;NGBn$~62;-}=Xu?KAM?JFhaOm_XlH;TkIdjeM4#^Kk-_ zq_iw!UM_#p&!A)tDHmp1IoIlSqK7Gg=1Sk4AcV($dI+jT4M9b9F09(aWd9yFF57Rf zEKloPO?}px=wfb>(kd1g4iJEuTnwA}07h-C(h4jws|5~g4RWIU$TOUZfj&HgHnIlS z5{L`ux)2Xb--B?0eX!w*lR;kA^8ri!BVCTrf2M%4{b6^d=+u_Cius;-7xZX?rhuTX z_(umURJ7+I$_e_Xa%6y>lFa8d9icQ3<`%h2PVSB3-Dm5||?Tzwr` z&xK-`9Df$3A5uU%=CXWuZ;0aXpDI{C`~4W8k?mjQe zd*IRgg!0If`(S4zz%X)|Y2Gh^gyWjzEL2u=OUfY*&_aAo)yfkYyOgKa)) z0byc%%A6a84%2r~F@^a{TFvB*G?~5WyID8k!`s!+?w1zAmoT{3ujcIo2_6^!axBvxv`Q71m(g~zAIo#jn zWr%Lio4oFGF)k^MsVF~j(w2*Re#V~iN(^>6;g;ZpPj?0pE%|s8C68BjrnA8Mf1EzS zMzNKxsp`>C{N662kEmM!ya5zy68dw(|82+TaYvQVxSh7L!ENYT7LI#Se+EDv;t>4NYaSx`Ar6 zIW)3Yg~QU$c|YNuGNjmSdVbtC6geyzm8Ta|$tDJ;C;TX&HPzc)HCLs}jX3)YKPkiW z^3-lS{Wj;Z8Z{@^rrLpnsel^vhNZU)a`df+0RDVK0WW~$Mj~|ZwXP>1m>>sPe3O|H zJ^*cJEExcOA3vh5j?4pD`lW_YB7dW3p<$J9P-J4kgcgZNU<+d-?+NK6itR=c8(xnB z%+f-Yg$9Ckl0Ni1JRVVO#pJ4e&#S?S6$g?HjVK+=J#>i09%`t$Ln# zeRlYU*LR7M@tp=%qjq zyi)E6Rq4@1n1f624xvgI2<%6F3}aK&e|7rQrP_Q2n%Q#Sj-_~t>SJ6LOGi18Hz`*JmGxy$y*$B@Ql>F% znKf>iV7U6f$VKL-KLHg>3MdT3dsRqt4(&_6$f8nghAJ8Z<_qixHEt*C2kirpriNh4Lu9*owDOH$-bxMY|*f zt9vac-8}MMhghDe4bq3Zk8x+MeSn8YI-U#i36!h6ZSIOn4Al-~>}?U@huiSB%5YGr z9#o`TaYXh$K}Uyta0W%C!O&G)1j>R1(hG=WtImDet}m8@7-7$o%C&s=F1)<4w6IoR zgs>e-U%E=|k=E#KY+p;xG74c!x;RR&(C}^)=^?{kiymCJ9>0*ziV#A@fxQ>-6B6?o=q1qVOKu) zTPbkL7?WAcyNhX6Jd)+cbxt|!y+@WIrO)ssDOyG9_mJfay;)Q@7JP>?9x&3JE{UH{nj{ZND<{l1ZaXmPDRPW9U z1VIzHtJNHT+q*%1f+N?MILaTk&diTF{0SkAe}3#I2UQ24NDLZ z@?!uLWxCJa3+gNJ1MP|(3%2ydKV|)YGaI=Mbjf@mRl|IEW>0A-`xkk`ET&e z=W@!%z|Y@CodCtffW&(P`0-ENApLpdeV`t5P(T8Og~`FPRAZ_aQ<(qjccC2slRXQ5 z|APycmD}4nvcjIpqRQ;UOzLF4nsNs9f|DDn^HgIs)Otg0QyYj)M#OS#+RaX3+2iDb ztxH~({Bjw@{tC;wpWZ|AiFay3%sK#1+|!)|8V?I`vj)n7kGgxUG+ZU zM;SaAOIO+Bk!wxY1pVhS|3RQTB5LojPgyy-?U9uAA&aVK>L)D54j-5gZzR)>^^CQe z(7GdN^dynJYhEGY5$uv>B_O$^@kst0@AgRMh>+J+y-0wPJI(A|6-(5D+b#O$UxN)> z+zwPC=0>Nx(?sYaN9xA6eo#9+LOBurzF=eHq~N84bu>OtulnZ6HzTDd0Tt0Xz@w?e z1ymMF`6p6H$M~^9V0(nZHeo30sgRlQ7VfwE=QLJ;_0P7iaw zN1u!o@&_MXI<1ExatEiZyThH$t`e>*u6DW`cPI&m?Jh<9JUsQLhmFqVlgwyguqeuV zg{b^rThuH9DF1q|4SL_~{0zQIOo+EA{jmbw-uQs?7(Jsqsh z#_gTW^6M`G8f;dYLucoPhK%id6nDS)IOzu*ko%nNN&bVvcQKY7)?e@cQj(6F%_PgE zVV^cw`c?FfHnw*-oBCS?s#aXH75xXJGq~HMb73S4t{fs`CYHEF&F5*d=Tq=OGSXqq z`(7k6B$_Qx-GtRM>y-z+I~>ER!FHgi%Nu75s->dWb`?ht-O_4wCKdGut?DR3I##SP zgZ@k2^Rmb3UlPg5=v@SkM}!cgKf0vExIJ@qog}8dNS|yn)A7$VQJ;&)HOy~O)AEE7 zI8S)`g|K12hP7G(yQVWP)P3qT_1ZsLYjGnXZs{!}T&(R1(ms{;d)(5^go|4g&xs}( z#lAzin3IPgmqO_mPW0&4W3F>_ldUYCnI7yP?9ONLAKX3geZ)Ng#W~o!C|7p-Fny;i5Y|^m90qB= z(HX`{Ju)lo3!k}={~hP?cw!`d@94;KB~rCJ!#Kk!GsHQD3+_TMBd(Hd#pGo7Hbse8 zj?PYuM)s~bH~h_*#b>C;j9E|mc3jxHz4QL))6luWs9|i3(!##*%3gPT^~kt5KDTj6 zKfLynLuvC|&#t3Ma6T@ERY|smWwMOxs|PdF6ZHY9)GZ@L5{90_LZR zS*#-YnDvOCZ0lwd<;Bisv{D$MEWU$-qvs<0nd4EM<;*Q+>5&w`}Pu5zMB zTWDD)Xs@Whemc)mxd|<43ak_Idm%rwU3>Lsf}F?3Sz%J8>$7>j*}gK%-@I2g*IiHK z#bwdI`lHtfkb8aIRQgO7>*{{L*IjMWP6Ki)MognqnQT6EYH5EI0q2|upk!Si$;hfm zAEjlwb`mI|0q-ZRO3}kkn_LQW)7y;H3zgI#Y16EshkfXgNci;Z&-DJiXQnZ%&d$WX zbha!bQh3e1+R#>U=zrH^U6!fLgA%O`zcZq|_Lz={6wX)Qpgw;Ps8R~$=Jz?u8h19u zSlpZ8ULF)~jjLCVc7Kl}w^U}ax_U?LuA|+1H?Cd(u>I=PpykW8e*b!0<1>G=~tr`L1$yG?aH9(TI=4Lq<8SC#eHA88YyXiW6r;}oJz1(q9`G6cbki8Rm^dT zs9`^bZXZ2yD2m2M9$N0W?UTO!itl^X<6=!h_qn@fSsK+(nbo!&_TWf_8G#RkD#con zr48tN514|s;@U<9Z>^3!P>a`Er;F^?eRcBqH4`4&Flp8NSo9+WpXo&?_8rhFA-|lr zoc1@(dRi-gfy)@4=EHuUe6v*N9wAe$k0UGOUkvAw=ig(QpktT>lrv|X!&oC(-Yp^( z&7VN2*ANNLChN?_O+Q6WzRJBulWF*yM7fL`GrJ#pu+-^J@e+NeFU(!EKA7%iI;%(r zYjNT1{7)PD#LqvcLgTD#<~t&lN*|&$4}x|H#b@Cr1VO#I5qMn(-@AhipRa2=`UcMq zES*P03`F&^KE3($J$+t$+f?$$0Ls-B-4}M|(@vJJcKy?Eh2wv^aj$o}V+PBFpaQ}| z$~~Hr+WX>==c`n(DvvWLYlrPTO&8t?Cc`@|X>WDbA{Iv*UMs+n4F zONMdkf;x`BQFr&R+Q8L`ojqQc@`p`N>FJ+$<{&#wR5xGExkrQ!6~HH${#8HV(&-S}vmZ@bkItur=0GrxY|(HeMG z*NYTqEa0SbFK;%c4iN#t?;u{!B|0hWKMfx?h9kQdKAaOEW6lUhK(2BJh4P!v6~pBe z1FGMWA|QpmnMgJkvU~suR7Nj>g_s5Z|(Stoqfd)xd|YV&>714IXT%Fk}^{<0k>*thKUKFV9a?&{*2_I=yw z$G0T3V`zMvx}-K$HAYwz_8OJE>})a&x7MxRt{$l|Yzy*}+(pl4l22up4%G_Ect~2# zcHotMovq%PlUaBCx9%>2WNfY8RPXdHH1^8>Q`@!2GyS&z^>p|qp_C-Y9;XT^a>^-1 zqEI0fQBKQd#9}#3Un!MPNphHp+9)}MF^BCNF=B?9<5(g~8&6|H5$k8u|x8eY>({CRZJyRhsTckbJoxo$JF7jw-b zEq9kPU!5Y%GE=cJu^nOq6r~uS1zHF^`hNF7ij>s&LWRY0P_xhc&foZj9KBmqBa0YL zG`2AWk#QbbyO7%_GBC?k2BlFeuOKRL9mLVpR1neAr1IE1TkcEPkxMs++7~y{wJN{V zkQ_DW<){LqV%Weg#n3V>)N?-p+v4O=>8`*HB7Kk4G4fB}OG+@>O?Rp&tbC`ex3M}i z%gllCm`jBc`ya>8(3HkeEj{y>(44#*^j^C57oQ$#6e>Q~p@Q>z+-d$(vND&a%IOv0 z7IgGh+b_E^bxY0O1d>6iyh|>atyuTx?4u`-&R~&#tcB$y@kzwepvOGBEM)UsW}zz< zgL;iA#uU0e_qsk}j!k5mX<74IGV}6E z$H&xbdul<@uq++aQ3fFOW*sO3pQBPCP~Mp1S$A)jwUa+6hFMdrb(F~;B_0J-~M=yjhQ|z8IUb63;?qOk#WL} zb$=)VVD^6jCfd&||3aAg=X@0cILJ|9YC~slA%OcAAhr9ejFAv#dIE%S3V;#e^*``z z?+=ihS|a{W=kJCePyB`agkiA41{;Nd&gl`rcmFzYY#wyiuUB(+Q^%22!4{dxTsAX{yPK#?rS!Alu{8LAp1OT%jCJ{y%}pI%oaAZan|P*ja*U zI+#xRm#RYz*NrAxKb?|8|7{;u#B^65nr&?+x^lb7(|m18$4kCoJKDCW9Nv@uBUc&d z{W#!Sj9vh4j9?D7?UbwmsrOijk?$Wv!i8E8>iNeJ!2Q3}B0M2P(88loufpSzA7z^U zM6$x~kllcb5q|z{2jC?>rR5qcHT%q~FvcrD06^YafCPVt)i)JGIv{_433knPVcixx zygdf25Xh!numJ`>Ld+~Y{^uk85;gCy6mSQZ*Flh`z)e8+K`x653#h18T&dqcFY|LO z1<$lt<#>6MhD%{t6lnNlGg#3_RN3iHF8OCwHSe8ChD!>|>1Z7T?EEN%p=JQ@Zta#6 zEpGxBzx?y0YJJ6EsH>rOD+rslixJjf+yv~{k;H2>wbx|NhE2j+_OV{z`qO8H?9Z`T z99$r`J+KVS&^_{is9B31XEArK%$^oU^LeX%EAy_a6(C{B6^ikWSS&CqrofnN*ke=J zzmyIv>Aoa$Hq#nR>n1-C?%K3-5DTh^+F96)Mj#Yx8kOXoCYjNb)qL@A6o+M43KDVO@T;3K%~Ymp2?uy+w~9#F+oM9=}F$MT`^6=hJvS8a9?c(l_@EQc0>yj zE*XD z89sZ%cu;}uhVYGC@Ebi8?@~}gzb^m%#L*aBFqKWBU?~z*yq}pM+^(h|h({u5GEi*- z?nz_!fo*z262OUYz8aB(vq?Ny)YkVY%vRCIoEN=1!wTIhyRx7gY;tA|&{!hS*hAf{ z2);7%Jp+{%krl?!TnH{VD!ia9%jde&TQ7bij)(Gc$C`#vX&z|S_O>&ZKcme1NRNv+ z3R&Lf>L*XMo9sgYw&9KSc7p`hL}sYPZv*5nSg%%^>}oMKDpfq9Za#4Qdewxq&y`|o zTW(S7!FZQKvA7&RbLI|}?!WNoZALCc}$GLIu>5feGEeu64)QTH_| zu&+2P?0chv{v&6^)NGQp`U8o?yO`Da0BG-uIW{8zy)?m#_4B0Lc&2HXdy{La7c4qo z$D+6oP|CP_cByV=-|Bi%Lo-}B2+&YOq`^|AoO2X*q8At6t*Wea{r1PL5{_FxD7rN&vr>eJ(=W({+e*MfweU`=FqI+lLAAe!1st350Sebwu5 zn=y!hB11XHCfMVOS_FZ&7>64GKbP`l$Y}UwaoOk+DnSoR^5J^Bin>WAaJmW2wQe&Y z!*VobL7G}b28ra_q5X41k^|=k`!l8d@r2=o+n~UyrLNgZYaw<%4yXMA85r%KSQT2F zAKGv3e~2`Dj?RCFr+%2q&49xDR%Wg2I9Idg1=ebjlwxUj5H;msN0~D%ZzJoIHfsf1 z9b^FwKNLRy?Tn@^e)lBE=eN!#7!~%d@wL_qOMKSYU;`t_JtIdte=<~Ghzxmn^>yQ7 zhYR~zitTmBQ_{}vs8md=7!-TeJSkI0H#j}^?Sb%e^KNeO8yGQlUGwl}{ve-wj(**k z8j0fPu}&P+ zsNOAqT~u^9!yQ!<^#nt@AgBceEBd84br~CmX78_Yu-Aibn7LXrkAZc=onFHN5AEr& z%Q9=z;A}aCpE@%WLM{5d{5B|gagyz^4X50><)0WTOYBkn0I4({9qpfVbB582eFu(jwKB|GF_z&@whRj>}Shh(WYMePlGGQ(;xyn@{Hfs75b+Fa@9#IVTqEvL^GhU`Kw zhJ~WFtXmBK-YcA|vKB?--{<+yTjt>nkNpypxAlsIg)e2tNSGqD;nVk794``#zy=s)uJ>jxSnC@= z6+2AKYR548+0ci3;;4=F!40KL7YdJ%?eW`T>#y}SBe-_AQrS_DPX{d4UrBqsdi1m9 z5?E()V4Zo}b~UJz^gA3Ik^YrS#R5W=Tdl~7SaEGa*0~YUbdDSnkqv{3$LE? z_g#N4H)}k>jvYIr+EAH>oIo~#{Biz?8G72mRZq4`%bMl-Y%`u^8qbUb4xC#pQCei6 zGNZgpubBl7UAtgwIA>CXgENW)YM8g@X+d_o`Y(|9KDV@7t5>9rBd^sSUf*0{P>a6b z^8Eul&fHZ`P~YI&JDMgk&CN{WrXzYs1!}?Xs|c{lBtODbR;Y&osel}&-^+RwD6QTJ z*t{QqZa-$E_JcIRxK5zqVUl3HCp)k;$S&pM;s_|dQp( z9B1znDb$R6hpgfSoPJ-TVl)x#*C5>Ad6 z4_rFG^IMBUSA1cNx0}uU%+&sk_&v+=_lG1*;Tq$UUxL$(t`?cL{!xe@@N>>GLA<)Q z{24n?MH~c0Yg^yeqwFL50`9=l1`mmF6rUn0=FWv05*$|Vr}Bp@r%I`Vs(y#1K?GvP z7M+>Gp)uNCW#y4(&i>i4l4REv(olZ5GctnhNQFu;VU6qQug8|22GLt5Mm)ZcsdfUW zF$z)-EB$FWy5uSw2@L;)@nFvo)OFcV8GX_XHi(Y$0MP7@9{S!k?wO4{)N^Tw3vCo0KhKjcw zlTfo|mVRJS{di#aVcvJk8ssq!CDn7U;tSC^|rz3xRB)(#a$Elo)DTMlk&k;2P&_wEpjTw*k_?}jkBftvi! zIawY%q<)X8kxty|x9O+vus8g8$_ZDu;K+ET#e`Za(dFTusgCj_BSB)Ezub)m%vdE_ znObC#(?1e>&i4pD`9b;?b3%g_j?~o47#0Ame=0vY+V}Dq;J~Cj!N4~-1`cngm7rj=E%MuQ& zEhVmv5I?k5t;Qc-!$46csv(~Z483w|Y9kDq%*fjT-&ZrCUjhwEo;&;|!B0Wu zc4vV*VgSW736N=?-FBkBKJ^A8kCf0CMVa72^~j`<sU)?4Li1c7&xt}&O&6oIXh;#lT7Rz%WeZ9MotQ=OWrko(}^+C9cv&D;b5D3bFoeE z$P9=y$TL})163rwAxG?UiFyxr!=`P)ws#+}R&LgI0H3_FDGdKe{LxAD{w!fIloI|- z@@DV*lMaBSa3TOOPx-Y!o&!2qLm`b@7%i0PA@s`dXM%fJy)&smU}w8||0*)leVAC2T%zr$d`xlUD$!E6AwM qFU9~=hY@0=|2wtt?;x|L*Glx#%{kpz=exIrBidNmTNWO@9P Date: Thu, 25 May 2023 15:42:42 -0700 Subject: [PATCH 04/11] update dxd --- DxDispatch/CMakeLists.txt | 2 +- DxDispatch/README.md | 2 +- DxDispatch/cmake/onnxruntime.cmake | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/DxDispatch/CMakeLists.txt b/DxDispatch/CMakeLists.txt index 500cdb7e..6fa0d930 100644 --- a/DxDispatch/CMakeLists.txt +++ b/DxDispatch/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.18) -project(dxdispatch VERSION 0.15.1 LANGUAGES CXX) +project(dxdispatch VERSION 0.15.2 LANGUAGES CXX) # ============================================================================== # External Libraries/Helpers diff --git a/DxDispatch/README.md b/DxDispatch/README.md index 60b6a41c..7e311f13 100644 --- a/DxDispatch/README.md +++ b/DxDispatch/README.md @@ -94,7 +94,7 @@ The default redistributable versions of components (e.g. nuget, archives): - **Direct3D 12 (nuget)**: [Microsoft.Direct3D.D3D12 (1.610.2)](https://www.nuget.org/packages/Microsoft.Direct3D.D3D12/1.610.2) - 2023/04/20 - **DX Compiler (archive)**: [December 2022 (v1.7.2212.1)](https://github.com/microsoft/DirectXShaderCompiler/releases/tag/v1.7.2212.1) - 2023/03/02 - **PIX Event Runtime (nuget)**: [WinPixEventRuntime (1.0.230302001)](https://www.nuget.org/packages/WinPixEventRuntime/1.0.230302001) - 2023/03/02 -- **ONNX Runtime (nuget)**: [Microsoft.ML.OnnxRuntime.DirectML (1.14.1)](https://www.nuget.org/packages/Microsoft.ML.OnnxRuntime.DirectML/1.14.1) - 2023/02/27 +- **ONNX Runtime (nuget)**: [Microsoft.ML.OnnxRuntime.DirectML (1.15.0)](https://www.nuget.org/packages/Microsoft.ML.OnnxRuntime.DirectML/1.15.0) - 2023/05/24 Configuration is done using CMake cache variables. For example, Direct3D can be switched to a system dependency by adding `-DDXD_DIRECT3D_TYPE=winsdk` to the command line when first configuring the project. Use `cmake-gui` or `ccmake` to view the available variables. diff --git a/DxDispatch/cmake/onnxruntime.cmake b/DxDispatch/cmake/onnxruntime.cmake index f23257cc..933df0ff 100644 --- a/DxDispatch/cmake/onnxruntime.cmake +++ b/DxDispatch/cmake/onnxruntime.cmake @@ -52,13 +52,13 @@ function(init_onnxruntime_cache_variables prefix) # _ONNXRUNTIME_NUGET_VERSION set(${prefix}_ONNXRUNTIME_NUGET_VERSION - 1.14.1 + 1.15.0 CACHE STRING "Version of the ONNX Runtime NuGet package (TYPE == nuget)." ) # _ONNXRUNTIME_NUGET_HASH set(${prefix}_ONNXRUNTIME_NUGET_HASH - c8ae7623385b19cd5de968d0df5383e13b97d1b3a6771c9177eac15b56013a5a + C168D1C9C73E14041DF904E4B38F01A7F955AEF94AAFDEB4ED996F0656054062 CACHE STRING "SHA256 hash of the ONNX Runtime NuGet package (TYPE == nuget)." ) From b26964f177a449642a0c163ccf539cee9dd3579f Mon Sep 17 00:00:00 2001 From: Justin Stoecker Date: Thu, 25 May 2023 15:44:54 -0700 Subject: [PATCH 05/11] update cg --- DxDispatch/cgmanifest.json | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/DxDispatch/cgmanifest.json b/DxDispatch/cgmanifest.json index 156e6c55..751d2835 100644 --- a/DxDispatch/cgmanifest.json +++ b/DxDispatch/cgmanifest.json @@ -15,7 +15,7 @@ "type": "nuget", "nuget": { "name": "Microsoft.Direct3D.D3D12", - "version": "1.608.2" + "version": "1.610.2" } } }, @@ -24,7 +24,7 @@ "type": "nuget", "nuget": { "name": "Microsoft.AI.DirectML", - "version": "1.10.1" + "version": "1.12.0" } } }, @@ -33,8 +33,8 @@ "type": "other", "other": { "name": "DirectX Shader Compiler", - "version": "2022_12_16", - "downloadUrl": "https://github.com/microsoft/DirectXShaderCompiler/releases/download/v1.7.2212/dxc_2022_12_16.zip" + "version": "2023_03_01", + "downloadUrl": "https://github.com/microsoft/DirectXShaderCompiler/releases/download/v1.7.2212.1/dxc_2023_03_01.zip" } } }, @@ -89,7 +89,7 @@ "type": "nuget", "nuget": { "name": "WinPixEventRuntime", - "version": "1.0.220124001" + "version": "1.0.230302001" } } }, @@ -116,7 +116,7 @@ "type": "nuget", "nuget": { "name": "Microsoft.ML.OnnxRuntime.DirectML", - "version": "1.14.1" + "version": "1.15.0" } } } From 7b9f1aaa02399e8a4c0712c1299804b91879d4e4 Mon Sep 17 00:00:00 2001 From: Justin Stoecker Date: Fri, 26 May 2023 20:07:56 -0700 Subject: [PATCH 06/11] undo readme edits --- README.md | 56 +++++++++++++++++++------------------------------------ 1 file changed, 19 insertions(+), 37 deletions(-) diff --git a/README.md b/README.md index 09cc4a78..2ad558b1 100644 --- a/README.md +++ b/README.md @@ -10,15 +10,12 @@ More information about DirectML can be found in [Introduction to DirectML](https - [Hardware requirements](#hardware-requirements) - [For application developers](#for-application-developers) - [For users, data scientists, and researchers](#for-users-data-scientists-and-researchers) - - [DirectML Samples](#directml-samples) -- [Support in Machine Learning Frameworks](#support-in-machine-learning-frameworks) - - [Windows ML on DirectML](#windows-ml-on-directml) - - [ONNX Runtime on DirectML](#onnx-runtime-on-directml) - - [TensorFlow with DirectML](#tensorflow-with-directml) - - [PyTorch with DirectML](#pytorch-with-directml) -- [Tools for DirectML](#tools-for-directml) - - [Olive (Model Optimization)](#olive-model-optimization) - - [DxDispatch (Testing/Benchmarking)](#dxdispatch-testingbenchmarking) +- [DirectML Samples](#directml-samples) +- [DxDispatch Tool](#dxdispatch-tool) +- [Windows ML on DirectML](#windows-ml-on-directml) +- [ONNX Runtime on DirectML](#onnx-runtime-on-directml) +- [TensorFlow with DirectML](#tensorflow-with-directml) +- [PyTorch with DirectML](#pytorch-with-directml) - [Feedback](#feedback) - [External Links](#external-links) - [Documentation](#documentation) @@ -27,13 +24,13 @@ More information about DirectML can be found in [Introduction to DirectML](https Visit the [DirectX Landing Page](https://devblogs.microsoft.com/directx/landing-page/) for more resources for DirectX developers. -# Getting Started with DirectML +## Getting Started with DirectML DirectML is distributed as a system component of Windows 10, and is available as part of the Windows 10 operating system (OS) in Windows 10, version 1903 (10.0; Build 18362), and newer. Starting with DirectML [version 1.4.0](https://docs.microsoft.com/windows/win32/direct3d12/dml-version-history), DirectML is also available as a standalone redistributable package (see [Microsoft.AI.DirectML](https://www.nuget.org/packages/Microsoft.AI.DirectML/)), which is useful for applications that wish to use a fixed version of DirectML, or when running on older versions of Windows 10. -## Hardware requirements +### Hardware requirements DirectML requires a DirectX 12 capable device. Almost all commercially-available graphics cards released in the last several years support DirectX 12. Examples of compatible hardware include: @@ -42,7 +39,7 @@ DirectML requires a DirectX 12 capable device. Almost all commercially-available * NVIDIA Kepler (GTX 600 series) and above * Qualcomm Adreno 600 and above -## For application developers +### For application developers DirectML exposes a native C++ DirectX 12 API. The header and library (DirectML.h/DirectML.lib) are available as part of the [redistributable NuGet package](https://www.nuget.org/packages/Microsoft.AI.DirectML/), and are also included in the Windows 10 SDK version 10.0.18362 or newer. @@ -51,7 +48,7 @@ DirectML exposes a native C++ DirectX 12 API. The header and library (DirectML.h * [DirectML programming guide](https://docs.microsoft.com/windows/win32/direct3d12/dml) * [DirectML API reference](https://docs.microsoft.com/windows/win32/direct3d12/direct3d-directml-reference) -## For users, data scientists, and researchers +### For users, data scientists, and researchers DirectML is built-in as a backend to several frameworks such as Windows ML, ONNX Runtime, and TensorFlow. @@ -69,10 +66,6 @@ DirectML C++ sample code is available under [Samples](./Samples). * [DirectMLSuperResolution](./Samples/DirectMLSuperResolution): A sample that uses DirectML to execute a basic super-resolution model to upscale video from 540p to 1080p in real time. * [yolov4](./Samples/yolov4): YOLOv4 is an object detection model capable of recognizing up to 80 different classes of objects in an image. This sample contains a complete end-to-end implementation of the model using DirectML, and is able to run in real time on a user-provided video stream. -The DirectML & Olive samples show how to optimize different types of models to run even better with DirectML: -* [Stable Diffusion](https://github.com/microsoft/Olive/tree/main/examples/directml/stable_diffusion): illustrates offline optimization of the Stable Diffusion architecture for greatly improved inference speed. -* [Dolly v2](https://github.com/microsoft/Olive/tree/main/examples/directml/dolly_v2): : illustrates offline optimization of the Dolly v2 model, a large language model that can be run on local hardware. - DirectML Python sample code is available under [Python/samples](./Python/samples). The samples require PyDirectML, an open source Python projection library for DirectML, which can be built and installed to a Python executing environment from [Python/src](./Python/src). Refer to the [Python/README.md](Python/README.md) file for more details. * [MobileNet](./Python/samples/mobilenet.py): Adapted from the [ONNX MobileNet model](https://github.com/onnx/models/tree/master/vision/classification/mobilenet). MobileNet classifies an image into 1000 different classes. It is highly efficient in speed and size, ideal for mobile applications. @@ -81,7 +74,9 @@ DirectML Python sample code is available under [Python/samples](./Python/samples * [FNS-Candy](./Python/samples/candy.py): Adapted from the [Windows ML Style Transfer model](https://github.com/microsoft/Windows-Machine-Learning/tree/master/Samples/FNSCandyStyleTransfer) sample, FNS-Candy re-applies specific artistic styles on regular images. * [Super Resolution](./Python/samples/superres.py): Adapted from the [ONNX Super Resolution model](https://github.com/onnx/models/tree/master/vision/super_resolution/sub_pixel_cnn_2016), Super-Res upscales and sharpens the input images to refine the details and improve image quality. -# Support in Machine Learning Frameworks +## DxDispatch Tool + +[DxDispatch](./DxDispatch/README.md) is simple command-line executable for launching DirectX 12 compute programs (including DirectML operators) without writing all the C++ boilerplate. ## Windows ML on DirectML @@ -132,17 +127,7 @@ PyTorch on DirectML is supported on both the latest versions of Windows 10 and t * [torch-directml PyPI project](https://pypi.org/project/torch-directml/) * [PyTorch homepage](https://pytorch.org/) -# Tools for DirectML - -## Olive (Model Optimization) - -[Olive](https://github.com/microsoft/olive) is a model optimization tool that composes industry-leading techniques across model compression, optimization, and compilation. Check out the [Olive and DirectML examples](https://github.com/microsoft/Olive/tree/main/examples/directml) for how you can optimize models to run their best on DirectML. - -## DxDispatch (Testing/Benchmarking) - -[DxDispatch](./DxDispatch/README.md) is simple command-line executable for launching DirectX 12 compute programs (including DirectML operators) without writing all the C++ boilerplate. It's a great tool for debugging and profiling in conjunction with [PIX on Windows](https://devblogs.microsoft.com/pix/introduction/). - -# Feedback +## Feedback We look forward to hearing from you! @@ -154,24 +139,21 @@ We look forward to hearing from you! * For ONNX Runtime issues, please file an issue at [microsoft/onnxruntime](https://github.com/microsoft/onnxruntime/issues). -# External Links +## External Links -## Documentation +### Documentation [DirectML programming guide](https://docs.microsoft.com/windows/win32/direct3d12/dml) [DirectML API reference](https://docs.microsoft.com/windows/win32/direct3d12/direct3d-directml-reference) -## More information +### More information [Introducing DirectML (Game Developers Conference '19)](https://www.youtube.com/watch?v=QjQm_wNrvVw) [Accelerating GPU Inferencing with DirectML and DirectX 12 (SIGGRAPH '18)](http://on-demand.gputechconf.com/siggraph/2018/video/sig1814-2-adrian-tsai-gpu-inferencing-directml-and-directx-12.html) [Windows AI: hardware-accelerated ML on Windows devices (Microsoft Build '20)](https://www.youtube.com/watch?v=-qf2PMuOXWI&feature=youtu.be) [Gaming with Windows ML (DirectX Developer Blog)](https://devblogs.microsoft.com/directx/gaming-with-windows-ml/) [DirectML at GDC 2019 (DirectX Developer Blog)](https://devblogs.microsoft.com/directx/directml-at-gdc-2019/) -[DirectX ❤ Linux (DirectX Developer Blog)](https://devblogs.microsoft.com/directx/directx-heart-linux/) -[DirectML at Build 2023](https://devblogs.microsoft.com/directx/directml-at-build-2023/) -[Optimize DirectML performance with Olive](https://devblogs.microsoft.com/directx/optimize-directml-performance-with-olive/) -[DirectML ❤ Stable Diffusion](https://devblogs.microsoft.com/directx/dml-stable-diffusion/) +[DirectX ❤ Linux (DirectX Developer Blog)](https://devblogs.microsoft.com/directx/directx-heart-linux/) -# Contributing +## Contributing This project welcomes contributions and suggestions. Most contributions require you to agree to a Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us From 31b7e1d670aaaacf935f98a345e8df467d5bb4c0 Mon Sep 17 00:00:00 2001 From: Justin Stoecker Date: Fri, 26 May 2023 20:22:55 -0700 Subject: [PATCH 07/11] parse mha op --- DxDispatch/src/model/JsonParsersGenerated.cpp | 77 +++++++++++++++++++ DxDispatch/tools/GenerateParsingHelpers.ps1 | 14 ++-- 2 files changed, 85 insertions(+), 6 deletions(-) diff --git a/DxDispatch/src/model/JsonParsersGenerated.cpp b/DxDispatch/src/model/JsonParsersGenerated.cpp index 1d0c78c7..e77bff5c 100644 --- a/DxDispatch/src/model/JsonParsersGenerated.cpp +++ b/DxDispatch/src/model/JsonParsersGenerated.cpp @@ -219,6 +219,7 @@ DML_OPERATOR_TYPE ParseDmlOperatorType(const rapidjson::Value& value) if (!strcmp(valueString, "DML_OPERATOR_RESAMPLE2") || !strcmp(valueString, "RESAMPLE2")) { return DML_OPERATOR_RESAMPLE2; } if (!strcmp(valueString, "DML_OPERATOR_RESAMPLE_GRAD1") || !strcmp(valueString, "RESAMPLE_GRAD1")) { return DML_OPERATOR_RESAMPLE_GRAD1; } if (!strcmp(valueString, "DML_OPERATOR_DIAGONAL_MATRIX1") || !strcmp(valueString, "DIAGONAL_MATRIX1")) { return DML_OPERATOR_DIAGONAL_MATRIX1; } + if (!strcmp(valueString, "DML_OPERATOR_MULTIHEAD_ATTENTION") || !strcmp(valueString, "MULTIHEAD_ATTENTION")) { return DML_OPERATOR_MULTIHEAD_ATTENTION; } throw std::invalid_argument(fmt::format("'{}' is not a recognized value for DML_OPERATOR_TYPE.", valueString)); } @@ -429,6 +430,10 @@ DML_FEATURE_LEVEL ParseDmlFeatureLevel(const rapidjson::Value& value) if (!strcmp(valueString, "DML_FEATURE_LEVEL_4_0") || !strcmp(valueString, "4_0")) { return DML_FEATURE_LEVEL_4_0; } if (!strcmp(valueString, "DML_FEATURE_LEVEL_4_1") || !strcmp(valueString, "4_1")) { return DML_FEATURE_LEVEL_4_1; } if (!strcmp(valueString, "DML_FEATURE_LEVEL_5_0") || !strcmp(valueString, "5_0")) { return DML_FEATURE_LEVEL_5_0; } + if (!strcmp(valueString, "DML_FEATURE_LEVEL_5_1") || !strcmp(valueString, "5_1")) { return DML_FEATURE_LEVEL_5_1; } + if (!strcmp(valueString, "DML_FEATURE_LEVEL_5_2") || !strcmp(valueString, "5_2")) { return DML_FEATURE_LEVEL_5_2; } + if (!strcmp(valueString, "DML_FEATURE_LEVEL_6_0") || !strcmp(valueString, "6_0")) { return DML_FEATURE_LEVEL_6_0; } + if (!strcmp(valueString, "DML_FEATURE_LEVEL_6_1") || !strcmp(valueString, "6_1")) { return DML_FEATURE_LEVEL_6_1; } throw std::invalid_argument(fmt::format("'{}' is not a recognized value for DML_FEATURE_LEVEL.", valueString)); } @@ -535,6 +540,28 @@ DML_RANDOM_GENERATOR_TYPE ParseDmlRandomGeneratorTypeField(const rapidjson::Valu }); } +DML_MULTIHEAD_ATTENTION_MASK_TYPE ParseDmlMultiheadAttentionMaskType(const rapidjson::Value& value) +{ + if (value.GetType() != rapidjson::Type::kStringType) + { + throw std::invalid_argument("DML_MULTIHEAD_ATTENTION_MASK_TYPE must be a string."); + } + auto valueString = value.GetString(); + if (!strcmp(valueString, "DML_MULTIHEAD_ATTENTION_MASK_TYPE_NONE") || !strcmp(valueString, "NONE")) { return DML_MULTIHEAD_ATTENTION_MASK_TYPE_NONE; } + if (!strcmp(valueString, "DML_MULTIHEAD_ATTENTION_MASK_TYPE_KEY_SEQUENCE_LENGTH") || !strcmp(valueString, "KEY_SEQUENCE_LENGTH")) { return DML_MULTIHEAD_ATTENTION_MASK_TYPE_KEY_SEQUENCE_LENGTH; } + if (!strcmp(valueString, "DML_MULTIHEAD_ATTENTION_MASK_TYPE_KEY_SEQUENCE_END_START") || !strcmp(valueString, "KEY_SEQUENCE_END_START")) { return DML_MULTIHEAD_ATTENTION_MASK_TYPE_KEY_SEQUENCE_END_START; } + if (!strcmp(valueString, "DML_MULTIHEAD_ATTENTION_MASK_TYPE_KEY_QUERY_SEQUENCE_LENGTH_START_END") || !strcmp(valueString, "KEY_QUERY_SEQUENCE_LENGTH_START_END")) { return DML_MULTIHEAD_ATTENTION_MASK_TYPE_KEY_QUERY_SEQUENCE_LENGTH_START_END; } + if (!strcmp(valueString, "DML_MULTIHEAD_ATTENTION_MASK_TYPE_BOOLEAN") || !strcmp(valueString, "BOOLEAN")) { return DML_MULTIHEAD_ATTENTION_MASK_TYPE_BOOLEAN; } + throw std::invalid_argument(fmt::format("'{}' is not a recognized value for DML_MULTIHEAD_ATTENTION_MASK_TYPE.", valueString)); +} + +DML_MULTIHEAD_ATTENTION_MASK_TYPE ParseDmlMultiheadAttentionMaskTypeField(const rapidjson::Value& object, std::string_view fieldName, bool required, DML_MULTIHEAD_ATTENTION_MASK_TYPE defaultValue) +{ + return ParseFieldHelper(object, fieldName, required, defaultValue, [](auto& value){ + return ParseDmlMultiheadAttentionMaskType(value); + }); +} + // ==================================================================================================== // DIRECTML FLAGS // ==================================================================================================== @@ -3981,6 +4008,54 @@ Model::DmlDispatchableDesc::BindPoints GetBindPoints(const DML_DIAGONAL_MATRIX1_ return bindPoints; } +DML_OPERATOR_DESC* ParseDmlMultiheadAttentionOperatorDesc(const rapidjson::Value& value, bool fused, BucketAllocator& allocator) +{ + if (!value.IsObject()) { throw std::invalid_argument("Expected a valid JSON object."); } + auto desc = allocator.Allocate(); + desc->QueryTensor = fused ? nullptr : ParseDmlTensorDescField(value, "QueryTensor", allocator, false); + desc->KeyTensor = fused ? nullptr : ParseDmlTensorDescField(value, "KeyTensor", allocator, false); + desc->ValueTensor = fused ? nullptr : ParseDmlTensorDescField(value, "ValueTensor", allocator, false); + desc->StackedQueryKeyTensor = fused ? nullptr : ParseDmlTensorDescField(value, "StackedQueryKeyTensor", allocator, false); + desc->StackedKeyValueTensor = fused ? nullptr : ParseDmlTensorDescField(value, "StackedKeyValueTensor", allocator, false); + desc->StackedQueryKeyValueTensor = fused ? nullptr : ParseDmlTensorDescField(value, "StackedQueryKeyValueTensor", allocator, false); + desc->BiasTensor = fused ? nullptr : ParseDmlTensorDescField(value, "BiasTensor", allocator, false); + desc->MaskTensor = fused ? nullptr : ParseDmlTensorDescField(value, "MaskTensor", allocator, false); + desc->RelativePositionBiasTensor = fused ? nullptr : ParseDmlTensorDescField(value, "RelativePositionBiasTensor", allocator, false); + desc->PastKeyTensor = fused ? nullptr : ParseDmlTensorDescField(value, "PastKeyTensor", allocator, false); + desc->PastValueTensor = fused ? nullptr : ParseDmlTensorDescField(value, "PastValueTensor", allocator, false); + desc->OutputTensor = fused ? nullptr : ParseDmlTensorDescField(value, "OutputTensor", allocator, true); + desc->OutputPresentKeyTensor = fused ? nullptr : ParseDmlTensorDescField(value, "OutputPresentKeyTensor", allocator, false); + desc->OutputPresentValueTensor = fused ? nullptr : ParseDmlTensorDescField(value, "OutputPresentValueTensor", allocator, false); + desc->Scale = ParseFloat32Field(value, "Scale", true); + desc->MaskFilterValue = ParseFloat32Field(value, "MaskFilterValue", true); + desc->HeadCount = ParseUInt32Field(value, "HeadCount", true); + desc->MaskType = ParseDmlMultiheadAttentionMaskTypeField(value, "MaskType", true, {}); + auto opDesc = allocator.Allocate(); + opDesc->Type = DML_OPERATOR_MULTIHEAD_ATTENTION; + opDesc->Desc = desc; + return opDesc; +} + +Model::DmlDispatchableDesc::BindPoints GetBindPoints(const DML_MULTIHEAD_ATTENTION_OPERATOR_DESC& desc) +{ + Model::DmlDispatchableDesc::BindPoints bindPoints = {}; + bindPoints.inputs.push_back({"QueryTensor", 1, false}); + bindPoints.inputs.push_back({"KeyTensor", 1, false}); + bindPoints.inputs.push_back({"ValueTensor", 1, false}); + bindPoints.inputs.push_back({"StackedQueryKeyTensor", 1, false}); + bindPoints.inputs.push_back({"StackedKeyValueTensor", 1, false}); + bindPoints.inputs.push_back({"StackedQueryKeyValueTensor", 1, false}); + bindPoints.inputs.push_back({"BiasTensor", 1, false}); + bindPoints.inputs.push_back({"MaskTensor", 1, false}); + bindPoints.inputs.push_back({"RelativePositionBiasTensor", 1, false}); + bindPoints.inputs.push_back({"PastKeyTensor", 1, false}); + bindPoints.inputs.push_back({"PastValueTensor", 1, false}); + bindPoints.outputs.push_back({"OutputTensor", 1, true}); + bindPoints.outputs.push_back({"OutputPresentKeyTensor", 1, false}); + bindPoints.outputs.push_back({"OutputPresentValueTensor", 1, false}); + return bindPoints; +} + DML_OPERATOR_DESC* ParseDmlActivationEluOperatorDesc(const rapidjson::Value& value, bool fused, BucketAllocator& allocator) { if (!value.IsObject()) { throw std::invalid_argument("Expected a valid JSON object."); } @@ -4651,6 +4726,7 @@ DML_OPERATOR_DESC* ParseDmlOperatorDesc(const rapidjson::Value& value, bool fuse if (!strcmp(type, "DML_OPERATOR_RESAMPLE2") || !strcmp(type, "RESAMPLE2")) return ParseDmlResample2OperatorDesc(descValue, fused, allocator); if (!strcmp(type, "DML_OPERATOR_RESAMPLE_GRAD1") || !strcmp(type, "RESAMPLE_GRAD1")) return ParseDmlResampleGrad1OperatorDesc(descValue, fused, allocator); if (!strcmp(type, "DML_OPERATOR_DIAGONAL_MATRIX1") || !strcmp(type, "DIAGONAL_MATRIX1")) return ParseDmlDiagonalMatrix1OperatorDesc(descValue, fused, allocator); + if (!strcmp(type, "DML_OPERATOR_MULTIHEAD_ATTENTION") || !strcmp(type, "MULTIHEAD_ATTENTION")) return ParseDmlMultiheadAttentionOperatorDesc(descValue, fused, allocator); if (!strcmp(type, "DML_OPERATOR_ACTIVATION_ELU") || !strcmp(type, "ACTIVATION_ELU")) return ParseDmlActivationEluOperatorDesc(descValue, fused, allocator); if (!strcmp(type, "DML_OPERATOR_ACTIVATION_CELU") || !strcmp(type, "ACTIVATION_CELU")) return ParseDmlActivationCeluOperatorDesc(descValue, fused, allocator); if (!strcmp(type, "DML_OPERATOR_ACTIVATION_HARDMAX") || !strcmp(type, "ACTIVATION_HARDMAX")) return ParseDmlActivationHardmaxOperatorDesc(descValue, fused, allocator); @@ -4821,6 +4897,7 @@ Model::DmlDispatchableDesc::BindPoints GetBindPoints(const DML_OPERATOR_DESC& de case DML_OPERATOR_RESAMPLE2: return GetBindPoints(*reinterpret_cast(desc.Desc)); case DML_OPERATOR_RESAMPLE_GRAD1: return GetBindPoints(*reinterpret_cast(desc.Desc)); case DML_OPERATOR_DIAGONAL_MATRIX1: return GetBindPoints(*reinterpret_cast(desc.Desc)); + case DML_OPERATOR_MULTIHEAD_ATTENTION: return GetBindPoints(*reinterpret_cast(desc.Desc)); case DML_OPERATOR_ACTIVATION_ELU: return GetBindPoints(*reinterpret_cast(desc.Desc)); case DML_OPERATOR_ACTIVATION_CELU: return GetBindPoints(*reinterpret_cast(desc.Desc)); case DML_OPERATOR_ACTIVATION_HARDMAX: return GetBindPoints(*reinterpret_cast(desc.Desc)); diff --git a/DxDispatch/tools/GenerateParsingHelpers.ps1 b/DxDispatch/tools/GenerateParsingHelpers.ps1 index 6b334a2b..d03c4553 100644 --- a/DxDispatch/tools/GenerateParsingHelpers.ps1 +++ b/DxDispatch/tools/GenerateParsingHelpers.ps1 @@ -1,7 +1,7 @@ param ( [string]$SchemaFilePath = "$PSScriptRoot\DmlSchema.json", - [string]$MaxFeatureLevel = "5.1" + [string]$MaxFeatureLevel = "6.1" ) function ConvertSnakeToCamelCase($SnakeCaseName) @@ -154,12 +154,11 @@ function WriteOperatorFunction($Operator) { $Cpp += " desc->$($Field.Name) = AsPointer(ParseUInt32ArrayField(value, `"$($Field.Name)`", allocator, $Required));" } - elseif ($Field.Type -eq "operatorDesc") + elseif ($Field.Type -eq "fusedActivationOperatorDesc") { - $Fused = if ($Field.Name -eq 'FusedActivation') { 'true' } else { 'false' } - $Cpp += " desc->$($Field.Name) = ParseDmlOperatorDescField(value, `"$($Field.Name)`", $Fused, allocator, $Required);" + $Cpp += " desc->$($Field.Name) = ParseDmlOperatorDescField(value, `"$($Field.Name)`", true, allocator, $Required);" } - elseif ($Field.Type -eq "operatorDescArray") + elseif ($Field.Type -eq "fusedActivationOperatorDescArray") { $Cpp += " desc->$($Field.Name) = AsPointer(ParseDmlOperatorDescArrayField(value, `"$($Field.Name)`", true, allocator, $Required));" } @@ -237,7 +236,10 @@ $Cpp += "// $('='*100)" $Cpp += "" foreach ($Enum in $Schema.ApiEnums) { - $Cpp += WriteEnumParser $Enum + if (!$Enum.private) + { + $Cpp += WriteEnumParser $Enum + } } $Cpp += "// $('='*100)" From 6dc8f91106c9760dcd38dd4e5dd73102416e06f0 Mon Sep 17 00:00:00 2001 From: Justin Stoecker Date: Fri, 26 May 2023 20:23:22 -0700 Subject: [PATCH 08/11] update version --- DxDispatch/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DxDispatch/CMakeLists.txt b/DxDispatch/CMakeLists.txt index 6fa0d930..f3fcd3dd 100644 --- a/DxDispatch/CMakeLists.txt +++ b/DxDispatch/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.18) -project(dxdispatch VERSION 0.15.2 LANGUAGES CXX) +project(dxdispatch VERSION 0.15.3 LANGUAGES CXX) # ============================================================================== # External Libraries/Helpers From b339ffa71859345ab54acaa230504aa879a59003 Mon Sep 17 00:00:00 2001 From: "Tonpe, Gokul N" Date: Tue, 27 Jun 2023 00:58:06 +0530 Subject: [PATCH 09/11] fixing smaller input values, print only output --- DxDispatch/src/dxdispatch/Executor.cpp | 11 +++++++++-- DxDispatch/src/dxdispatch/OnnxDispatchable.cpp | 6 +++--- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/DxDispatch/src/dxdispatch/Executor.cpp b/DxDispatch/src/dxdispatch/Executor.cpp index 8e93fa30..ff356b54 100644 --- a/DxDispatch/src/dxdispatch/Executor.cpp +++ b/DxDispatch/src/dxdispatch/Executor.cpp @@ -357,6 +357,7 @@ std::ostream& operator<<(std::ostream& os, const BufferDataView& view) { uint32_t elementCount = view.desc.initialValues.size() / Device::GetSizeInBytes(view.desc.initialValuesDataType); auto values = reinterpret_cast(view.byteValues.data()); + printf("elementCount=%d\n", elementCount); for (uint32_t elementIndex = 0; elementIndex < elementCount; elementIndex++) { os << values[elementIndex]; @@ -364,6 +365,7 @@ std::ostream& operator<<(std::ostream& os, const BufferDataView& view) { os << ", "; } + } return os; } @@ -399,7 +401,12 @@ void Executor::operator()(const Model::PrintCommand& command) auto outputValues = m_device->Download(resource.Get()); auto& resourceDesc = m_model.GetResource(command.resourceName); auto& bufferDesc = std::get(resourceDesc.value); - LogInfo(fmt::format("Resource '{}': {}", command.resourceName, ToString(outputValues, bufferDesc))); + // print only output tensor + if (command.resourceName == "output") + { + LogInfo(fmt::format("Resource '{}': {}", command.resourceName, ToString(outputValues, bufferDesc))); + } + } catch (const std::exception& e) { @@ -441,7 +448,7 @@ void Executor::operator()(const Model::WriteFileCommand& command) } file.write(reinterpret_cast(fileData.data()), fileData.size()); - LogInfo(fmt::format("Resource '{}' written to '{}'", command.resourceName, command.targetPath)); + //LogInfo(fmt::format("Resource '{}' written to '{}'", command.resourceName, command.targetPath)); } catch (const std::exception& e) { diff --git a/DxDispatch/src/dxdispatch/OnnxDispatchable.cpp b/DxDispatch/src/dxdispatch/OnnxDispatchable.cpp index 22021bb3..38698b95 100644 --- a/DxDispatch/src/dxdispatch/OnnxDispatchable.cpp +++ b/DxDispatch/src/dxdispatch/OnnxDispatchable.cpp @@ -472,9 +472,9 @@ void OnnxDispatchable::Bind(const Bindings& jsonBindings, uint32_t iteration) { for (auto& binding : m_mergedBindings) { - LogInfo(fmt::format("{} Tensor '{}':", (binding.isInput ? "Input" : "Output"), binding.name)); - LogInfo(fmt::format(" Resource = {}", binding.resourceType)); - LogInfo(fmt::format(" Data Type = {}", GetOnnxTensorTypeString(binding.dataType))); + //LogInfo(fmt::format("{} Tensor '{}':", (binding.isInput ? "Input" : "Output"), binding.name)); + //LogInfo(fmt::format(" Resource = {}", binding.resourceType)); + //LogInfo(fmt::format(" Data Type = {}", GetOnnxTensorTypeString(binding.dataType))); std::string shapeString = "["; for (size_t i = 0; i < binding.shape.size(); i++) { From 0eeac1526481edb537a90ff2c0409eb3f023cc6e Mon Sep 17 00:00:00 2001 From: "Tonpe, Gokul N" Date: Tue, 27 Jun 2023 01:03:57 +0530 Subject: [PATCH 10/11] limit max input vlaue --- DxDispatch/src/model/JsonParsers.cpp | 30 ++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/DxDispatch/src/model/JsonParsers.cpp b/DxDispatch/src/model/JsonParsers.cpp index a62923be..c21cfb5d 100644 --- a/DxDispatch/src/model/JsonParsers.cpp +++ b/DxDispatch/src/model/JsonParsers.cpp @@ -1010,6 +1010,14 @@ std::vector GenerateInitialValuesFromConstant(DML_TENSOR_DATA_TYPE da } } +float randf(float minv = 0.0f, float maxv = 1.0f) +{ + if (minv >= maxv) return minv; + + return (float)(rand() / (float)RAND_MAX) * (maxv - minv) + minv; + //return (float)(rand() / (float)RAND_MAX); +}; + std::vector GenerateInitialValuesFromSequence(DML_TENSOR_DATA_TYPE dataType, const rapidjson::Value& object) { auto valueCount = ParseUInt32Field(object, "valueCount"); @@ -1017,10 +1025,13 @@ std::vector GenerateInitialValuesFromSequence(DML_TENSOR_DATA_TYPE da auto AsBytes = [=,&object](auto& parser, auto defaultValue)->std::vector { auto value = parser(object, "valueStart", true, defaultValue); + //auto value = randf(); auto valueDelta = parser(object, "valueDelta", true, defaultValue); std::vector allBytes; allBytes.reserve(sizeof(value) * valueCount); + //allBytes.reserve(sizeof(float) * valueCount); + for (size_t i = 0; i < valueCount; i++) { for (auto byte : gsl::as_bytes(gsl::make_span(&value, 1))) @@ -1028,10 +1039,29 @@ std::vector GenerateInitialValuesFromSequence(DML_TENSOR_DATA_TYPE da allBytes.push_back(byte); } value += valueDelta; + if (value > 3) + { + //reset + value = 0.001; + } + //value = randf(); } + +#if 0 + for (size_t i = 0; i < valueCount; i++) + { + float val = randf(); + for (auto byte : gsl::as_bytes(gsl::make_span(&val,1))) + { + allBytes.push_back(byte); + } + //value += valueDelta; + } +#endif return allBytes; }; + switch (dataType) { case DML_TENSOR_DATA_TYPE_FLOAT16: return AsBytes(ParseFloat16Field, half_float::half(0)); From bc8f6f6682a04b3f8ecc29ab4740b39a4d468f25 Mon Sep 17 00:00:00 2001 From: "Tonpe, Gokul N" Date: Thu, 13 Jul 2023 11:37:39 -0700 Subject: [PATCH 11/11] Random input values enabling --- DxDispatch/src/dxdispatch/Executor.cpp | 4 ++ DxDispatch/src/model/JsonParsers.cpp | 68 +++++++++++++++++--------- 2 files changed, 49 insertions(+), 23 deletions(-) diff --git a/DxDispatch/src/dxdispatch/Executor.cpp b/DxDispatch/src/dxdispatch/Executor.cpp index ff356b54..255eb209 100644 --- a/DxDispatch/src/dxdispatch/Executor.cpp +++ b/DxDispatch/src/dxdispatch/Executor.cpp @@ -406,6 +406,10 @@ void Executor::operator()(const Model::PrintCommand& command) { LogInfo(fmt::format("Resource '{}': {}", command.resourceName, ToString(outputValues, bufferDesc))); } + if (command.resourceName == "stackedKeyValue") + { + LogInfo(fmt::format("Resource '{}': {}", command.resourceName, ToString(outputValues, bufferDesc))); + } } catch (const std::exception& e) diff --git a/DxDispatch/src/model/JsonParsers.cpp b/DxDispatch/src/model/JsonParsers.cpp index c21cfb5d..9a326bdc 100644 --- a/DxDispatch/src/model/JsonParsers.cpp +++ b/DxDispatch/src/model/JsonParsers.cpp @@ -3,6 +3,8 @@ #include "StdSupport.h" #include "NpyReaderWriter.h" +#include + #ifndef WIN32 #define _stricmp strcasecmp #endif @@ -1010,14 +1012,6 @@ std::vector GenerateInitialValuesFromConstant(DML_TENSOR_DATA_TYPE da } } -float randf(float minv = 0.0f, float maxv = 1.0f) -{ - if (minv >= maxv) return minv; - - return (float)(rand() / (float)RAND_MAX) * (maxv - minv) + minv; - //return (float)(rand() / (float)RAND_MAX); -}; - std::vector GenerateInitialValuesFromSequence(DML_TENSOR_DATA_TYPE dataType, const rapidjson::Value& object) { auto valueCount = ParseUInt32Field(object, "valueCount"); @@ -1025,13 +1019,10 @@ std::vector GenerateInitialValuesFromSequence(DML_TENSOR_DATA_TYPE da auto AsBytes = [=,&object](auto& parser, auto defaultValue)->std::vector { auto value = parser(object, "valueStart", true, defaultValue); - //auto value = randf(); auto valueDelta = parser(object, "valueDelta", true, defaultValue); std::vector allBytes; allBytes.reserve(sizeof(value) * valueCount); - //allBytes.reserve(sizeof(float) * valueCount); - for (size_t i = 0; i < valueCount; i++) { for (auto byte : gsl::as_bytes(gsl::make_span(&value, 1))) @@ -1039,29 +1030,55 @@ std::vector GenerateInitialValuesFromSequence(DML_TENSOR_DATA_TYPE da allBytes.push_back(byte); } value += valueDelta; - if (value > 3) - { - //reset - value = 0.001; - } - //value = randf(); } + return allBytes; + }; -#if 0 + switch (dataType) + { + case DML_TENSOR_DATA_TYPE_FLOAT16: return AsBytes(ParseFloat16Field, half_float::half(0)); + case DML_TENSOR_DATA_TYPE_FLOAT32: return AsBytes(ParseFloat32Field, 0.0f); + case DML_TENSOR_DATA_TYPE_FLOAT64: return AsBytes(ParseFloat64Field, 0.0); + case DML_TENSOR_DATA_TYPE_UINT8: return AsBytes(ParseUInt8Field, static_cast(0)); + case DML_TENSOR_DATA_TYPE_UINT16: return AsBytes(ParseUInt16Field, static_cast(0)); + case DML_TENSOR_DATA_TYPE_UINT32: return AsBytes(ParseUInt32Field, static_cast(0)); + case DML_TENSOR_DATA_TYPE_UINT64: return AsBytes(ParseUInt64Field, static_cast(0)); + case DML_TENSOR_DATA_TYPE_INT8: return AsBytes(ParseInt8Field, static_cast(0)); + case DML_TENSOR_DATA_TYPE_INT16: return AsBytes(ParseInt16Field, static_cast(0)); + case DML_TENSOR_DATA_TYPE_INT32: return AsBytes(ParseInt32Field, static_cast(0)); + case DML_TENSOR_DATA_TYPE_INT64: return AsBytes(ParseInt64Field, static_cast(0)); + default: throw std::invalid_argument(fmt::format("Invalid tensor data type.")); + } +} + +std::vector GenerateInitialValuesFromRandom(DML_TENSOR_DATA_TYPE dataType, const rapidjson::Value& object) +{ + auto valueCount = ParseUInt32Field(object, "valueCount"); + auto seed = ParseUInt32Field(object, "seed"); + auto valueMin = ParseFloat32Field(object, "min"); + auto valueMax = ParseFloat32Field(object, "max"); + + // randomize data + std::mt19937 random_generator(seed); // static, create it once! + std::uniform_real_distribution uniform_distribution(valueMin, valueMax); + + auto AsBytes = [&](auto& parser, auto defaultValue)->std::vector + { + + std::vector allBytes; + allBytes.reserve(sizeof(defaultValue) * valueCount); for (size_t i = 0; i < valueCount; i++) { - float val = randf(); - for (auto byte : gsl::as_bytes(gsl::make_span(&val,1))) + const auto f32 = uniform_distribution(random_generator); + const auto value = static_cast(f32); + for (auto byte : gsl::as_bytes(gsl::make_span(&value, 1))) { allBytes.push_back(byte); } - //value += valueDelta; } -#endif return allBytes; }; - switch (dataType) { case DML_TENSOR_DATA_TYPE_FLOAT16: return AsBytes(ParseFloat16Field, half_float::half(0)); @@ -1192,6 +1209,11 @@ Model::BufferDesc ParseModelBufferDesc(const std::filesystem::path& parentPath, ensureInitialValuesDataType(); buffer.initialValues = GenerateInitialValuesFromSequence(buffer.initialValuesDataType, initialValuesField->value); } + else if (initialValuesField->value.HasMember("seed")) + { + ensureInitialValuesDataType(); + buffer.initialValues = GenerateInitialValuesFromRandom(buffer.initialValuesDataType, initialValuesField->value); + } // e.g. "initialValues": { "sourcePath": "inputFile.npy" } else if (initialValuesField->value.HasMember("sourcePath")) {