From c7136f03d9c649a749753e97adfa041b128721d1 Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Mon, 24 Jul 2023 17:16:19 +0800 Subject: [PATCH 1/8] added support for tensor_split parameter as an advanced parameter. --- expose.h | 2 ++ gpttype_adapter.cpp | 15 +++++++++++++++ koboldcpp.py | 17 ++++++++++++++++- 3 files changed, 33 insertions(+), 1 deletion(-) diff --git a/expose.h b/expose.h index a3114aeb07dad..d542dfebfb91e 100644 --- a/expose.h +++ b/expose.h @@ -2,6 +2,7 @@ const int stop_token_max = 10; const int ban_token_max = 10; +const int tensor_split_max = 16; // match kobold's sampler list and order enum samplers { @@ -46,6 +47,7 @@ struct load_model_inputs const float rope_freq_scale = 1.0f; const float rope_freq_base = 10000.0f; const char * banned_tokens[ban_token_max]; + const float tensor_split[tensor_split_max]; }; struct generation_inputs { diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp index 058ba935104b2..4c37dd7e511da 100644 --- a/gpttype_adapter.cpp +++ b/gpttype_adapter.cpp @@ -475,6 +475,21 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in llama_ctx_params.rope_freq_scale = rope_freq_scale; llama_ctx_params.n_batch = blasbatchsize; + #if defined(GGML_USE_CUBLAS) + bool ts_all_zero = true; + for (int i = 0; i < tensor_split_max; ++i) { + if (inputs.tensor_split[i] != 0.0f) { + ts_all_zero = false; + break; + } + } + if(!ts_all_zero) + { + llama_ctx_params.tensor_split = inputs.tensor_split; + printf("CUBLAS: Applying Custom Tensor Split!\n"); + } + #endif + llama_ctx_v3 = llama_init_from_file(modelname.c_str(), llama_ctx_params); if (llama_ctx_v3 == NULL) diff --git a/koboldcpp.py b/koboldcpp.py index e242bbba5ff34..8b96e8c3ad635 100755 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -14,6 +14,7 @@ stop_token_max = 10 sampler_order_max = 7 ban_token_max = 10 +tensor_split_max = 16 class load_model_inputs(ctypes.Structure): _fields_ = [("threads", ctypes.c_int), @@ -38,7 +39,8 @@ class load_model_inputs(ctypes.Structure): ("gpulayers", ctypes.c_int), ("rope_freq_scale", ctypes.c_float), ("rope_freq_base", ctypes.c_float), - ("banned_tokens", ctypes.c_char_p * ban_token_max)] + ("banned_tokens", ctypes.c_char_p * ban_token_max), + ("tensor_split", ctypes.c_float * tensor_split_max)] class generation_inputs(ctypes.Structure): _fields_ = [("seed", ctypes.c_int), @@ -208,6 +210,13 @@ def load_model(model_filename): os.environ["CUDA_VISIBLE_DEVICES"] = "1" elif (args.usecublas and "2" in args.usecublas): os.environ["CUDA_VISIBLE_DEVICES"] = "2" + + for n in range(tensor_split_max): + if args.has_advanced=='advanced' and args.tensor_split and n < len(args.tensor_split): + inputs.tensor_split[n] = float(args.tensor_split[n]) + else: + inputs.tensor_split[n] = 0 + inputs.executable_path = (getdirpath()+"/").encode("UTF-8") inputs.debugmode = args.debugmode banned_tokens = args.bantokens @@ -1634,5 +1643,11 @@ def main(args): compatgroup.add_argument("--useclblast", help="Use CLBlast for GPU Acceleration. Must specify exactly 2 arguments, platform ID and device ID (e.g. --useclblast 1 0).", type=int, choices=range(0,9), nargs=2) compatgroup.add_argument("--usecublas", help="Use CuBLAS for GPU Acceleration. Requires CUDA. Select lowvram to not allocate VRAM scratch buffer. Enter a number afterwards to select and use 1 GPU. Leaving no number will use all GPUs.", nargs='*',metavar=('[lowvram|normal] [main GPU ID]'), choices=['normal', 'lowvram', '0', '1', '2']) parser.add_argument("--gpulayers", help="Set number of layers to offload to GPU when using GPU. Requires GPU.",metavar=('[GPU layers]'), type=int, default=0) + + # for the seldom used esoteric commands + subparsers = parser.add_subparsers(title="Advanced Configs (For Experts)", dest="has_advanced") + advanced_subparser = subparsers.add_parser("advanced", help="Additional settings for experts. Run 'koboldcpp.py advanced --help' for more info") + advanced_subparser.add_argument("--tensor_split", help="CUDA with ALL set only. How to split tensors across multiple GPUs, space-separated list of proportions, e.g. 3 1", type=float, nargs='+') + args = parser.parse_args() main(args) From 42f70cb2f6a8089e0a0560a459e4ba317bac4d49 Mon Sep 17 00:00:00 2001 From: Kawrakow <48489457+ikawrakow@users.noreply.github.com> Date: Mon, 24 Jul 2023 12:55:02 +0300 Subject: [PATCH 2/8] Fix scalar version of Q5_K when QK_K = 64 (#2362) Co-authored-by: Iwan Kawrakow --- k_quants.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/k_quants.c b/k_quants.c index c576fd7a7568a..e790abf887dd0 100644 --- a/k_quants.c +++ b/k_quants.c @@ -3297,8 +3297,7 @@ void ggml_vec_dot_q5_K_q8_K(const int n, float * restrict s, const void * restri #else - - uint8_t aux8[QK_K]; + int8_t aux8[QK_K]; int16_t aux16[16]; float sums [8]; memset(sums, 0, 8*sizeof(float)); @@ -3308,7 +3307,7 @@ void ggml_vec_dot_q5_K_q8_K(const int n, float * restrict s, const void * restri const uint8_t * restrict q4 = x[i].qs; const uint8_t * restrict hm = x[i].qh; const int8_t * restrict q8 = y[i].qs; - uint8_t * restrict a = aux8; + int8_t * restrict a = aux8; for (int l = 0; l < 32; ++l) { a[l+ 0] = q4[l] & 0xF; a[l+32] = q4[l] >> 4; From 825e34baa3bca8f58a6c1dbbb4e538c49970756c Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Mon, 24 Jul 2023 17:37:26 +0800 Subject: [PATCH 3/8] default horde name and better handling for horde (+3 squashed commit) Squashed commit: [fadfa60] better idle handling for horde worker [a3971e6] updated lite [2ca2b79] seems to not generate rubbish --- CMakeLists.txt | 2 +- klite.embd | 2 +- koboldcpp.py | 11 ++++++++--- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0669413a2d31a..7e70f7c198219 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -99,7 +99,7 @@ if (LLAMA_CUBLAS) if (LLAMA_CUDA_DMMV_F16) set(CMAKE_CUDA_ARCHITECTURES "60;61") # needed for f16 CUDA intrinsics else() - set(CMAKE_CUDA_ARCHITECTURES "37;86") # lowest CUDA 12 standard + lowest for integer intrinsics + set(CMAKE_CUDA_ARCHITECTURES "37;61") # lowest CUDA 12 standard + lowest for integer intrinsics endif() endif() message(STATUS "Using CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}") diff --git a/klite.embd b/klite.embd index ec0d75dff24da..3bf8ddc9e28d2 100644 --- a/klite.embd +++ b/klite.embd @@ -26,7 +26,7 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp. Please +/** @license zlib.js 2012 - imaya, The MIT License */function(){"use strict";function l(e){throw e}var r=void 0,t,aa=this;function v(e,t){var n,o=e.split("."),s=aa;!(o[0]in s)&&s.execScript&&s.execScript("var "+o[0]);for(;o.length&&(n=o.shift());)o.length||t===r?s=s[n]?s[n]:s[n]={}:s[n]=t}var y="undefined"!=typeof Uint8Array&&"undefined"!=typeof Uint16Array&&"undefined"!=typeof Uint32Array&&"undefined"!=typeof DataView,z;for(new(y?Uint8Array:Array)(256),z=0;256>z;++z)for(var B=z,ba=7,B=B>>>1;B;B>>>=1)--ba;var ca=[0,1996959894,3993919788,2567524794,124634137,1886057615,3915621685,2657392035,249268274,2044508324,3772115230,2547177864,162941995,2125561021,3887607047,2428444049,498536548,1789927666,4089016648,2227061214,450548861,1843258603,4107580753,2211677639,325883990,1684777152,4251122042,2321926636,335633487,1661365465,4195302755,2366115317,997073096,1281953886,3579855332,2724688242,1006888145,1258607687,3524101629,2768942443,901097722,1119000684,3686517206,2898065728,853044451,1172266101,3705015759,2882616665,651767980,1373503546,3369554304,3218104598,565507253,1454621731,3485111705,3099436303,671266974,1594198024,3322730930,2970347812,795835527,1483230225,3244367275,3060149565,1994146192,31158534,2563907772,4023717930,1907459465,112637215,2680153253,3904427059,2013776290,251722036,2517215374,3775830040,2137656763,141376813,2439277719,3865271297,1802195444,476864866,2238001368,4066508878,1812370925,453092731,2181625025,4111451223,1706088902,314042704,2344532202,4240017532,1658658271,366619977,2362670323,4224994405,1303535960,984961486,2747007092,3569037538,1256170817,1037604311,2765210733,3554079995,1131014506,879679996,2909243462,3663771856,1141124467,855842277,2852801631,3708648649,1342533948,654459306,3188396048,3373015174,1466479909,544179635,3110523913,3462522015,1591671054,702138776,2966460450,3352799412,1504918807,783551873,3082640443,3233442989,3988292384,2596254646,62317068,1957810842,3939845945,2647816111,81470997,1943803523,3814918930,2489596804,225274430,2053790376,3826175755,2466906013,167816743,2097651377,4027552580,2265490386,503444072,1762050814,4150417245,2154129355,426522225,1852507879,4275313526,2312317920,282753626,1742555852,4189708143,2394877945,397917763,1622183637,3604390888,2714866558,953729732,1340076626,3518719985,2797360999,1068828381,1219638859,3624741850,2936675148,906185462,1090812512,3747672003,2825379669,829329135,1181335161,3412177804,3160834842,628085408,1382605366,3423369109,3138078467,570562233,1426400815,3317316542,2998733608,733239954,1555261956,3268935591,3050360625,752459403,1541320221,2607071920,3965973030,1969922972,40735498,2617837225,3943577151,1913087877,83908371,2512341634,3803740692,2075208622,213261112,2463272603,3855990285,2094854071,198958881,2262029012,4057260610,1759359992,534414190,2176718541,4139329115,1873836001,414664567,2282248934,4279200368,1711684554,285281116,2405801727,4167216745,1634467795,376229701,2685067896,3608007406,1308918612,956543938,2808555105,3495958263,1231636301,1047427035,2932959818,3654703836,1088359270,936918e3,2847714899,3736837829,1202900863,817233897,3183342108,3401237130,1404277552,615818150,3134207493,3453421203,1423857449,601450431,3009837614,3294710456,1567103746,711928724,3020668471,3272380065,1510334235,755167117],C=y?new Uint32Array(ca):ca;if(aa.Uint8Array!==r)try{eval("String.fromCharCode.apply(null, new Uint8Array([0]));")}catch(e){String.fromCharCode.apply=function(e){return function(t,n){return e.call(String.fromCharCode,t,Array.prototype.slice.call(n))}}(String.fromCharCode.apply)}function D(e){var t,n,o,r,s,l,a,i,c,d,m=e.length,u=0,_=Number.POSITIVE_INFINITY;for(i=0;iu&&(u=e[i]),e[i]<_&&(_=e[i]);for(t=1<>=1;for(d=o<<16|i,c=l;cG;G++)switch(!0){case 143>=G:F.push([G+48,8]);break;case 255>=G:F.push([G-144+400,9]);break;case 279>=G:F.push([G-256+0,7]);break;case 287>=G:F.push([G-280+192,8]);break;default:l("invalid literal: "+G)}var fa=function(){function e(e){switch(!0){case 3===e:return[257,e-3,0];case 4===e:return[258,e-4,0];case 5===e:return[259,e-5,0];case 6===e:return[260,e-6,0];case 7===e:return[261,e-7,0];case 8===e:return[262,e-8,0];case 9===e:return[263,e-9,0];case 10===e:return[264,e-10,0];case 12>=e:return[265,e-11,1];case 14>=e:return[266,e-13,1];case 16>=e:return[267,e-15,1];case 18>=e:return[268,e-17,1];case 22>=e:return[269,e-19,2];case 26>=e:return[270,e-23,2];case 30>=e:return[271,e-27,2];case 34>=e:return[272,e-31,2];case 42>=e:return[273,e-35,3];case 50>=e:return[274,e-43,3];case 58>=e:return[275,e-51,3];case 66>=e:return[276,e-59,3];case 82>=e:return[277,e-67,4];case 98>=e:return[278,e-83,4];case 114>=e:return[279,e-99,4];case 130>=e:return[280,e-115,4];case 162>=e:return[281,e-131,5];case 194>=e:return[282,e-163,5];case 226>=e:return[283,e-195,5];case 257>=e:return[284,e-227,5];case 258===e:return[285,e-258,0];default:l("invalid length: "+e)}}var t,n,o=[];for(t=3;258>=t;t++)n=e(t),o[t]=n[2]<<24|n[1]<<16|n[0];return o}();function I(e,t){switch(this.l=[],this.m=32768,this.d=this.f=this.c=this.t=0,this.input=y?new Uint8Array(e):e,this.u=!1,this.n=J,this.K=!1,!t&&(t={})||(t.index&&(this.c=t.index),t.bufferSize&&(this.m=t.bufferSize),t.bufferType&&(this.n=t.bufferType),t.resize&&(this.K=t.resize)),this.n){case ga:this.a=32768,this.b=new(y?Uint8Array:Array)(32768+this.m+258);break;case J:this.a=0,this.b=new(y?Uint8Array:Array)(this.m),this.e=this.W,this.B=this.R,this.q=this.V;break;default:l(Error("invalid inflate mode"))}}y&&new Uint32Array(fa);var ga=0,J=1;I.prototype.r=function(){for(;!this.u;){var e=K(this,3);switch(1&e&&(this.u=!0),e>>>=1){case 0:var t=this.input,n=this.c,o=this.b,s=this.a,a=t.length,i=r,c=o.length,d=r;switch(this.d=this.f=0,n+1>=a&&l(Error("invalid uncompressed block header: LEN")),i=t[n++]|t[n++]<<8,n+1>=a&&l(Error("invalid uncompressed block header: NLEN")),i===~(t[n++]|t[n++]<<8)&&l(Error("invalid uncompressed block header: length verify")),n+i>t.length&&l(Error("input buffer is broken")),this.n){case ga:for(;s+i>o.length;){if(i-=d=c-s,y)o.set(t.subarray(n,n+d),s),s+=d,n+=d;else for(;d--;)o[s++]=t[n++];this.a=s,o=this.e(),s=this.a}break;case J:for(;s+i>o.length;)o=this.e({H:2});break;default:l(Error("invalid inflate mode"))}if(y)o.set(t.subarray(n,n+i),s),s+=i,n+=i;else for(;i--;)o[s++]=t[n++];this.c=n,this.a=s,this.b=o;break;case 1:this.q(ha,ia);break;case 2:var m,u,_,g,p=K(this,5)+257,h=K(this,5)+1,f=K(this,4)+4,b=new(y?Uint8Array:Array)(L.length),v=r,w=r,A=r,k=r,x=r;for(x=0;x=R?8:255>=R?9:279>=R?7:8;var ha=D(Q),S=new(y?Uint8Array:Array)(30),T,ra;for(T=0,ra=S.length;T=i&&l(Error("input buffer is broken")),o|=s[a++]<>>t,e.d=r-t,e.c=a,n}function M(e,t){for(var n,o,r=e.f,s=e.d,a=e.input,i=e.c,c=a.length,d=t[0],m=t[1];s=c);)r|=a[i++]<>>16)>s&&l(Error("invalid code length: "+o)),e.f=r>>o,e.d=s-o,e.c=i,65535&n}function U(e){e=e||{},this.files=[],this.v=e.comment}function V(e,t){t=t||{},this.input=y&&e instanceof Array?new Uint8Array(e):e,this.c=0,this.ba=t.verify||!1,this.j=t.password}t=I.prototype,t.q=function(e,t){var n=this.b,o=this.a;this.C=e;for(var r,s,l,a,i=n.length-258;256!==(r=M(this,e));)if(256>r)o>=i&&(this.a=o,n=this.e(),o=this.a),n[o++]=r;else for(a=la[s=r-257],0=i&&(this.a=o,n=this.e(),o=this.a);a--;)n[o]=n[o++-l];for(;8<=this.d;)this.d-=8,this.c--;this.a=o},t.V=function(e,t){var n=this.b,o=this.a;this.C=e;for(var r,s,l,a,i=n.length;256!==(r=M(this,e));)if(256>r)o>=i&&(i=(n=this.e()).length),n[o++]=r;else for(a=la[s=r-257],0i&&(i=(n=this.e()).length);a--;)n[o]=n[o++-l];for(;8<=this.d;)this.d-=8,this.c--;this.a=o},t.e=function(){var e,t,n=new(y?Uint8Array:Array)(this.a-32768),o=this.a-32768,r=this.b;if(y)n.set(r.subarray(32768,n.length));else for(e=0,t=n.length;ee;++e)r[e]=r[o+e];return this.a=32768,r},t.W=function(e){var t,n,o,r=this.input.length/this.c+1|0,s=this.input,l=this.b;return e&&("number"==typeof e.H&&(r=e.H),"number"==typeof e.P&&(r+=e.P)),2>r?n=(o=(s.length-this.c)/this.C[2]/2*258|0)t&&(this.b.length=t),e=this.b),this.buffer=e},U.prototype.L=function(e){this.j=e},U.prototype.s=function(e){var t=65535&e[2]|2;return t*(1^t)>>8&255},U.prototype.k=function(e,t){e[0]=(C[255&(e[0]^t)]^e[0]>>>8)>>>0,e[1]=1+(6681*(20173*(e[1]+(255&e[0]))>>>0)>>>0)>>>0,e[2]=(C[255&(e[2]^e[1]>>>24)]^e[2]>>>8)>>>0},U.prototype.T=function(e){var t,n,o=[305419896,591751049,878082192];for(y&&(o=new Uint32Array(o)),t=0,n=e.length;t>>0,this.z=(e[t++]|e[t++]<<8|e[t++]<<16|e[t++]<<24)>>>0,this.J=(e[t++]|e[t++]<<8|e[t++]<<16|e[t++]<<24)>>>0,this.h=e[t++]|e[t++]<<8,this.g=e[t++]|e[t++]<<8,this.F=e[t++]|e[t++]<<8,this.ea=e[t++]|e[t++]<<8,this.ga=e[t++]|e[t++]<<8,this.fa=e[t++]|e[t++]<<8|e[t++]<<16|e[t++]<<24,this.$=(e[t++]|e[t++]<<8|e[t++]<<16|e[t++]<<24)>>>0,this.filename=String.fromCharCode.apply(null,y?e.subarray(t,t+=this.h):e.slice(t,t+=this.h)),this.X=y?e.subarray(t,t+=this.g):e.slice(t,t+=this.g),this.v=y?e.subarray(t,t+this.F):e.slice(t,t+this.F),this.length=t-this.offset};var va={N:1,ca:8,da:2048};function $(e){var t,n,o,s,a=[],i={};if(!e.i){if(e.o===r){var c,d=e.input;if(!e.D)e:{var m,u=e.input;for(m=u.length-12;0>>0,e.o=(d[c++]|d[c++]<<8|d[c++]<<16|d[c++]<<24)>>>0,e.w=d[c++]|d[c++]<<8,e.v=y?d.subarray(c,c+e.w):d.slice(c,c+e.w)}for(t=e.o,o=0,s=e.aa;o>>0,this.z=(e[t++]|e[t++]<<8|e[t++]<<16|e[t++]<<24)>>>0,this.J=(e[t++]|e[t++]<<8|e[t++]<<16|e[t++]<<24)>>>0,this.h=e[t++]|e[t++]<<8,this.g=e[t++]|e[t++]<<8,this.filename=String.fromCharCode.apply(null,y?e.subarray(t,t+=this.h):e.slice(t,t+=this.h)),this.X=y?e.subarray(t,t+=this.g):e.slice(t,t+=this.g),this.length=t-this.offset},t=V.prototype,t.Y=function(){var e,t,n,o=[];for(this.i||$(this),e=0,t=(n=this.i).length;e>>8^C[255&(h^c[f])];for(b=v>>3;b--;f+=8)h=(h=(h=(h=(h=(h=(h=(h=h>>>8^C[255&(h^c[f])])>>>8^C[255&(h^c[f+1])])>>>8^C[255&(h^c[f+2])])>>>8^C[255&(h^c[f+3])])>>>8^C[255&(h^c[f+4])])>>>8^C[255&(h^c[f+5])])>>>8^C[255&(h^c[f+6])])>>>8^C[255&(h^c[f+7])];d=(4294967295^h)>>>0,s.p!==d&&l(Error("wrong crc: file=0x"+s.p.toString(16)+", data=0x"+d.toString(16)))}return c},t.L=function(e){this.j=e},t.k=U.prototype.k,t.S=U.prototype.T,t.s=U.prototype.s,v("Zlib.Unzip",V),v("Zlib.Unzip.prototype.decompress",V.prototype.r),v("Zlib.Unzip.prototype.getFilenames",V.prototype.Y),v("Zlib.Unzip.prototype.setPassword",V.prototype.L)}.call(this);const default_client_agent="KoboldAiLite:17",stablehorde_url="https://stablehorde.net",poll_interval_base_text=500,poll_interval_base_img=3800,poll_interval_background=1e3,text_hordes=[{baseurl:"https://horde.koboldai.net",tag:"🤖",sort_order:1,client_agent:default_client_agent,get perf_endpoint(){return this.baseurl+"/api/v2/status/performance"},get models_endpoint(){return this.baseurl+"/api/v2/status/models?type=text"},get submit_endpoint(){return this.baseurl+"/api/v2/generate/text/async"},get polling_endpoint(){return this.baseurl+"/api/v2/generate/text/status"},get output_endpoint(){return this.baseurl+"/api/v2/generate/text/status"},get worker_endpoint(){return this.baseurl+"/api/v2/workers?type=text"},get finduser_endpoint(){return this.baseurl+"/api/v2/find_user"},get maintenance_endpoint(){return this.baseurl+"/api/v2/workers"}}];function find_text_horde(e){for(let t=0;t({baseurl:e.baseurl,fullurl:e.perf_endpoint}))),models_endpoints=text_hordes.map((e=>({baseurl:e.baseurl,fullurl:e.models_endpoint}))),worker_endpoints=text_hordes.map((e=>({baseurl:e.baseurl,fullurl:e.worker_endpoint}))),finduser_endpoints=text_hordes.map((e=>({baseurl:e.baseurl,fullurl:e.finduser_endpoint}))),stablehorde_submit_endpoint=stablehorde_url+"/api/v2/generate/async",stablehorde_poll_endpoint=stablehorde_url+"/api/v2/generate/check",stablehorde_output_endpoint=stablehorde_url+"/api/v2/generate/status",stablehorde_model_endpoint=stablehorde_url+"/api/v2/status/models",kobold_custom_gen_endpoint="/api/v1/generate/",kobold_custom_mdl_endpoint="/api/v1/model",kobold_custom_version_endpoint="/api/v1/info/version",kobold_custom_maxctxlen_endpoint="/api/v1/config/max_context_length",kobold_custom_genamt_endpoint="/api/v1/config/max_length",koboldcpp_version_endpoint="/api/extra/version",koboldcpp_abort_endpoint="/api/extra/abort",koboldcpp_check_endpoint="/api/extra/generate/check",oai_models_endpoint="/models",oai_submit_endpoint="/completions",oai_submit_endpoint_turbo="/chat/completions",scale_submit_endpoint="https://dashboard.scale.com/spellbook/api/v2/deploy/",claude_submit_endpoint="/complete",default_oai_base="https://api.openai.com",default_claude_base="https://api.anthropic.com",news_endpoint="https://news.concedo.workers.dev",horde_news_endpoint="https://hordenews.concedo.workers.dev",cors_proxy="https://proxy.concedo.workers.dev",defaultchatopponent="KoboldAI";var perfdata=null,models_data=[],selected_models=[],worker_data=[],selected_workers=[],gametext_arr=[],redo_arr=[],retry_prev_text="",redo_prev_text="",pending_response_id="",pending_response_horde=text_hordes[0],poll_in_progress=!1,poll_ticks_passed=0,prev_hl_chunk=null,pending_context_preinjection="",current_memory="",current_anote="",current_anotetemplate="[Author's note: <|>]",extrastopseq="",anote_strength=320,current_wi=[],loaded_storyobj=generate_base_storyobj(),generateimagesinterval=600,nextgeneratedimagemilestone=generateimagesinterval,image_db={},completed_imgs_meta={},stablemodels=[],custom_kobold_endpoint="",custom_oai_endpoint=default_oai_base,custom_oai_key="",custom_oai_model="",custom_scale_key="",custom_scale_ID="",custom_claude_endpoint=default_claude_base,custom_claude_key="",custom_claude_model="",uses_cors_proxy=!1,synchro_polled_response=null,synchro_pending_stream="",waiting_for_autosummary=!1,pending_found_story=null,filter_enabled=!0,temp_scenario=null,last_token_budget="",last_known_filename="",localmode=!1,localmodeport=5e3,localmodehost="localhost",kobold_endpoint_version="",koboldcpp_version="",last_request_str="No Requests Available",localsettings={my_api_key:"0000000000",home_cluster:text_hordes[0].baseurl,saved_oai_key:"",saved_oai_addr:"",saved_claude_key:"",saved_claude_addr:"",autoscroll:!0,trimsentences:!0,trimwhitespace:!1,opmode:1,adventure_is_action:!1,adventure_context_mod:!0,chatname:"You",chatopponent:defaultchatopponent,instruct_starttag:"\\n### Instruction:\\n",instruct_endtag:"\\n### Response:\\n",instruct_has_markdown:!1,persist_session:!0,speech_synth:0,beep_on:!1,image_styles:"",generate_images:localflag?"":"stable_diffusion",img_autogen:!1,img_allownsfw:!0,save_images:!0,case_sensitive_wi:!1,last_selected_preset:0,enhanced_chat_ui:!0,multiline_replies:!1,allow_continue_chat:!1,idle_responses:0,idle_duration:60,export_settings:!0,invert_colors:!1,max_context_length:1024,max_length:80,auto_ctxlen:!0,auto_genamt:!0,rep_pen:1.1,rep_pen_range:300,rep_pen_slope:.7,temperature:.7,top_p:.92,top_k:0,top_a:0,typ_s:1,tfs_s:1,sampler_order:[6,0,1,3,4,2,5]},defaultsettings=JSON.parse(JSON.stringify(localsettings));const presets=[{preset:"[Default]",description:"Known Working Settings.",temp:defaultsettings.temperature,genamt:defaultsettings.max_length,top_k:defaultsettings.top_k,top_p:defaultsettings.top_p,top_a:defaultsettings.top_a,typical:defaultsettings.typ_s,tfs:defaultsettings.tfs_s,rep_pen:defaultsettings.rep_pen,rep_pen_range:defaultsettings.rep_pen_range,rep_pen_slope:defaultsettings.rep_pen_slope,sampler_order:defaultsettings.sampler_order},{preset:"Inverted Mirror",description:"Good defaults with a different sampler order.",temp:defaultsettings.temperature,genamt:defaultsettings.max_length,top_k:defaultsettings.top_k,top_p:defaultsettings.top_p,top_a:defaultsettings.top_a,typical:defaultsettings.typ_s,tfs:defaultsettings.tfs_s,rep_pen:defaultsettings.rep_pen,rep_pen_range:defaultsettings.rep_pen_range,rep_pen_slope:defaultsettings.rep_pen_slope,sampler_order:[0,1,2,3,4,5,6]},{preset:"Godlike",description:"Makes AI give a descriptive and sensual output.",temp:.7,genamt:80,top_k:0,top_p:.5,top_a:.75,typical:.19,tfs:.97,rep_pen:1.1,rep_pen_range:1024,rep_pen_slope:.7,sampler_order:[6,5,4,3,2,1,0]},{preset:"Mayday",description:"Wacky plot, creativity from AI, crazy stories you want AI to weird out.",temp:1.05,genamt:80,top_k:0,top_p:.95,top_a:0,typical:1,tfs:1,rep_pen:1.1,rep_pen_range:1024,rep_pen_slope:.7,sampler_order:[6,0,1,2,3,4,5]},{preset:"Good Winds",description:"Let AI direct the plot, but still stay logical.",temp:.7,genamt:80,top_k:0,top_p:1,top_a:0,typical:1,tfs:.9,rep_pen:1.1,rep_pen_range:1024,rep_pen_slope:.7,sampler_order:[6,0,1,2,3,4,5]},{preset:"Liminal Drift",description:"Drives coherent dialogue, responses, and behavior, sometimes surreal situations arise based on information already present in the story.",temp:.66,genamt:80,top_k:0,top_p:1,top_a:.96,typical:.6,tfs:1,rep_pen:1.1,rep_pen_range:1024,rep_pen_slope:.7,sampler_order:[6,4,5,1,0,2,3]},{preset:"TavernAI",description:"Preset used in TavernAI.",temp:.79,genamt:80,top_k:0,top_p:.9,top_a:0,typical:1,tfs:.95,rep_pen:1.19,rep_pen_range:1024,rep_pen_slope:.9,sampler_order:[6,0,1,2,3,4,5]},{preset:"Storywriter 6B",description:"Optimized settings for relevant output.",genamt:80,rep_pen:1.1,rep_pen_range:2048,rep_pen_slope:.2,sampler_order:[6,5,0,2,3,1,4],temp:.72,tfs:1,top_a:0,top_k:0,top_p:.73,typical:1},{preset:"Coherent Creativity 6B",description:"A good balance between coherence, creativity, and quality of prose.",genamt:80,rep_pen:1.2,rep_pen_range:2048,rep_pen_slope:0,sampler_order:[6,5,0,2,3,1,4],temp:.51,tfs:.99,top_a:0,top_k:0,top_p:1,typical:1},{preset:"Luna Moth 6B",description:"A great degree of creativity without losing coherency.",temp:1.5,genamt:80,top_k:85,top_p:.24,top_a:0,typical:1,tfs:1,rep_pen:1.1,rep_pen_range:2048,rep_pen_slope:0,sampler_order:[6,5,0,2,3,1,4]},{preset:"Best Guess 6B",description:"A subtle change with alternative context settings.",temp:.8,genamt:80,top_k:100,top_p:.9,top_a:0,typical:1,tfs:1,rep_pen:1.15,rep_pen_range:2048,rep_pen_slope:3.4,sampler_order:[6,5,0,2,3,1,4]},{preset:"Pleasing Results 6B",description:"Expectable output with alternative context settings.",temp:.44,genamt:80,top_k:0,top_p:1,top_a:0,typical:1,tfs:.9,rep_pen:1.15,rep_pen_range:2048,rep_pen_slope:6.8,sampler_order:[6,5,0,2,3,1,4]},{preset:"Genesis 13B",description:"Stable and logical, but with scattered creativity.",temp:.63,genamt:80,top_k:0,top_p:.98,top_a:0,typical:1,tfs:.98,rep_pen:1.05,rep_pen_range:2048,rep_pen_slope:.1,sampler_order:[6,2,0,3,5,1,4]},{preset:"Basic Coherence 13B",description:"Keep things on track.",temp:.59,genamt:80,top_k:0,top_p:1,top_a:0,typical:1,tfs:.87,rep_pen:1.1,rep_pen_range:2048,rep_pen_slope:.3,sampler_order:[6,5,0,2,3,1,4]},{preset:"Ouroboros 13B",description:"Versatile, conforms well to poems, lists, chat, etc.",temp:1.07,genamt:80,top_k:100,top_p:1,top_a:0,typical:1,tfs:.93,rep_pen:1.05,rep_pen_range:404,rep_pen_slope:.8,sampler_order:[6,0,5,3,2,1,4]},{preset:"Ace of Spades 13B",description:"Expressive, while still staying focused.",temp:1.15,genamt:80,top_k:0,top_p:.95,top_a:0,typical:1,tfs:.8,rep_pen:1.05,rep_pen_range:2048,rep_pen_slope:7,sampler_order:[6,3,2,0,5,1,4]},{preset:"Low Rider 13B",description:"Reliable, aimed at story development.",temp:.94,genamt:80,top_k:12,top_p:1,top_a:0,typical:1,tfs:.94,rep_pen:1.05,rep_pen_range:2048,rep_pen_slope:.2,sampler_order:[6,5,0,2,3,1,4]},{preset:"Pro Writer 13B",description:"Optimal setting for readability, based on AI-powered mass statistical analysis of Euterpe output.",temp:1.35,genamt:80,top_k:0,top_p:1,top_a:0,typical:1,tfs:.69,rep_pen:1.15,rep_pen_range:2048,rep_pen_slope:.1,sampler_order:[6,3,2,5,0,1,4]},{preset:"Default 20B",description:"Good starting settings for NeoX 20B.",temp:.6,genamt:80,top_k:0,top_p:.9,top_a:0,typical:1,tfs:1,rep_pen:1.04,rep_pen_range:1024,rep_pen_slope:.7,sampler_order:[6,0,1,2,3,4,5]}];function init(){for(let e=0;ee.json())).then((e=>{e&&""!=e&&e.newstitle&&e.newstext&&""!=e.newstitle&&""!=e.newstext&&msgbox(e.newstext,e.newstitle,!0,e.nobtns)})).catch((e=>{console.log("Error: "+e)})),setupDragDrop(),navigator.userAgent.indexOf("iPhone")>-1&&document.querySelector('meta[name="viewport"]').setAttribute("content","width=device-width, initial-scale=1, maximum-scale=1"),document.getElementById("gametext").addEventListener("paste",(function(e){e.preventDefault();var t=(e.originalEvent||e).clipboardData.getData("text/plain");t=t.replace(/\r?\n/g,"
"),document.execCommand("insertHTML",!1,t)}))}function setupDragDrop(){const e=document.getElementById("gamescreen"),t=document.getElementById("chat_msg_body"),n=function(e){e.preventDefault(),e.stopPropagation();let t=e.dataTransfer.files;console.log(t);let n=0==gametext_arr.length&&""==current_memory&&""==current_anote&&0==current_wi.length&&0==redo_arr.length;t.length>0&&null!=t[0]&&t[0].name&&""!=t[0].name&&(n?load_selected_file(t[0]):msgboxYesNo("Overwrite existing story?","Open File",(()=>{hide_popups(),load_selected_file(t[0])}),(()=>{hide_popups()})))};e.addEventListener("dragover",(e=>{e.preventDefault(),e.stopPropagation()}),!1),t.addEventListener("dragover",(e=>{e.preventDefault(),e.stopPropagation()}),!1),e.addEventListener("drop",(e=>{n(e)}),!1),t.addEventListener("drop",(e=>{n(e)}),!1)}let initial_fetched_kudos=!1;function attempt_connect(e=!0){if(localmode){document.getElementById("customapidropdown").value=0;let e="http://";window.location.protocol.includes("https")&&!is_using_web_lite()&&(e="https://"),document.getElementById("customendpoint").value=e+localmodehost+":"+localmodeport,connect_custom_endpoint(),document.getElementById("lastreq").innerHTML=document.getElementById("lastreq2").innerHTML="You're using Kobold Lite Embedded."}else multifetch(perf_endpoints,((t,n)=>{if(t&&t.length>0){perfdata={queued_requests:0,queued_tokens:0,past_minute_tokens:0,worker_count:0};for(let e=0;ee.title.toLowerCase()==o.trim().toLowerCase()));void 0!==e&&(temp_scenario=e,preview_temp_scenario()),window.history.replaceState(null,null,window.location.pathname)}else e&&display_models()}else msgbox("Failed to connect to KAI Horde!\nPlease check your network connection."),document.body.classList.remove("connected"),document.getElementById("connectstatus").innerHTML="Offline Mode",document.getElementById("connectstatus").classList.add("color_orange"),document.getElementById("connectstatus").classList.remove("color_green"),render_gametext()}));localflag||fetch_image_models(),initial_fetched_kudos||localsettings.my_api_key==defaultsettings.my_api_key||(document.getElementById("apikey").value=localsettings.my_api_key,initial_fetched_kudos=!0,fetch_kudo_balance())}var image_models_fetched=!1;function fetch_image_models(e){image_models_fetched||fetch(stablehorde_model_endpoint).then((e=>e.json())).then((t=>{image_models_fetched=!0,stablemodels=[],t=t.sort((function(e,t){return t.count-e.count}));for(var n=0;n{console.log("Error: "+e)}))}function get_cursor_position(){let e=document.getElementById("gametext"),t=0;if(void 0!==window.getSelection){if(0!==window.getSelection().rangeCount){const n=window.getSelection().getRangeAt(0),o=n.cloneRange();o.selectNodeContents(e),o.setEnd(n.endContainer,n.endOffset),t=o.toString().length}}return t}function selectElementContents(e){var t=document.createRange();t.selectNodeContents(e);var n=window.getSelection();n.removeAllRanges(),n.addRange(t)}var timetaken_timestamp=performance.now();function startTimeTaken(){timetaken_timestamp=performance.now()}function getTimeTaken(){return((performance.now()-timetaken_timestamp)/1e3).toFixed(1)}function cyrb_hash(e,t=0){let n=3735928559^t,o=1103547991^t;for(let t,r=0;r>>16,2246822507)^Math.imul(o^o>>>13,3266489909),o=Math.imul(o^o>>>16,2246822507)^Math.imul(n^n>>>13,3266489909),(4294967296*(2097151&o)+(n>>>0)).toString(16).substring(0,6)}function import_props_into_object(e,t){for(var n in t)e[n]=t[n]}function is_using_custom_ep(){return""!=custom_oai_key||""!=custom_kobold_endpoint||""!=custom_scale_key||""!=custom_claude_key}function is_using_newer_kcpp(){return koboldcpp_version&&""!=koboldcpp_version&&compare_version_str(koboldcpp_version,"1.29")>0}function should_use_pseudostreaming(){let e=!!document.getElementById("pseudostreaming").checked,t=urlParams.get("streamamount");return is_using_newer_kcpp()&&(null==t||t<=0)&&(e=!1),waiting_for_autosummary&&(e=!1),e}function is_using_web_lite(){return window.location.hostname.includes("koboldai.net")||window.location.hostname.includes("kaihordewebui.github.io")}function get_most_common_cluster(e){let t=e[0].cluster,n={},o=0;for(let r=0;r\]/g,"").replace(/\[<\|d\|.+?\|d\|>\]/g,""))}for(var n={ga:t,md:[]},o=0;o0&&(n.cwi=current_wi),localsettings.export_settings&&(n.savedsettings=JSON.parse(JSON.stringify(localsettings)),n.savedsettings.my_api_key="0000000000",n.savedsettings.home_cluster=text_hordes[0].baseurl,n.savedsettings.saved_oai_key="",n.savedsettings.saved_oai_addr="",n.savedsettings.saved_claude_key="",n.savedsettings.saved_claude_addr="");var r=JSON.stringify(n);return console.log("Exporting story: "+r),buf_to_b64(lz_c.compress(r,1))}function export_share_story(){let e=generate_compressed_story(!0);console.log("Export Len: "+e.length),e.length>=4800?document.getElementById("sharewarning").classList.remove("hidden"):document.getElementById("sharewarning").classList.add("hidden"),document.getElementById("sharecontainer").classList.remove("hidden");let t="https://lite.koboldai.net/?s="+e;document.getElementById("sharestorytext").innerHTML=''+t+""}function copy_share_url(){var e=document.getElementById("sharestorytext");selectElementContents(e),navigator.clipboard.writeText(e.innerText)}function import_share_story(e){console.log("Importing shared story...");var t=!1,n=null;try{var o=lz_d.decompress(b64_to_buf(e));null==o||""==o?t=!0:n=JSON.parse(o)}catch(e){t=!0}if(null==n||t)msgbox("Could not import from URL. Is it valid?");else if(console.log("Importing story: "+o),fetch_models((e=>{if(0!=e.length||localmode){if(!localmode){selected_models=[];for(var t=0;te.cluster===selected_models[0].cluster))){let e=get_most_common_cluster(selected_models);selected_models=selected_models.filter((t=>t.cluster===e))}render_gametext()}}else msgbox("No models available. Unable to load.")})),restart_new_game(),gametext_arr=n.ga,migrate_old_images_in_gametext(),n.ca&&""!=n.ca&&(current_anote=n.ca,current_anotetemplate=n.ct),n.cm&&""!=n.cm&&(current_memory=n.cm),n.cwi&&n.cwi.length>0&&(current_wi=n.cwi),n.ess&&""!=n.ess&&(extrastopseq=n.ess),n.savedsettings&&""!=n.savedsettings){let e=localsettings.my_api_key,t=localsettings.home_cluster,o=localsettings.saved_oai_key,r=localsettings.saved_oai_addr,s=localsettings.saved_claude_key,l=localsettings.saved_claude_addr;import_props_into_object(localsettings,n.savedsettings),localsettings.my_api_key=e,localsettings.home_cluster=t,localsettings.saved_oai_key=o,localsettings.saved_oai_addr=r,localsettings.saved_claude_key=s,localsettings.saved_claude_addr=l}}function generate_base_storyobj(){return{gamestarted:!0,prompt:"",memory:"",authorsnote:"",anotetemplate:"",actions:[],actions_metadata:{},worldinfo:[],wifolders_d:{},wifolders_l:[]}}var tempfileurl=null;let newfilename="";function savenowfn(){var e=document.getElementById("tempfile"),t=new Blob([JSON.stringify(loaded_storyobj)],{type:"application/json"});console.log("Normal save handling"),tempfileurl&&window.URL.revokeObjectURL(tempfileurl),tempfileurl=window.URL.createObjectURL(t),e.href=tempfileurl,e.target="_blank",e.download=newfilename,setTimeout((function(){e.click()}),20)}function save_file(){null==loaded_storyobj.file_version||(loaded_storyobj=generate_base_storyobj());let e=gametext_arr;if(!localsettings.save_images){e=[];for(let t=0;t\]/g,"").replace(/\[<\|d\|.+?\|d\|>\]/g,""))}loaded_storyobj.prompt="",loaded_storyobj.actions=[],loaded_storyobj.actions_metadata={},e.length>0&&(loaded_storyobj.prompt=e[0]);for(var t=1;tClick to Save

If that does not work, right-click or long press this link, and select (Save As)

',"Save Story",!0)},r.readAsDataURL(o)}else savenowfn()}function load_file(e){let t=e.target;t.files.length>0?(load_selected_file(t.files[0]),document.getElementById("loadfileinput").value=""):console.log("No file to load")}function load_selected_file(e){var t="";e&&(t=e.name);let n=new FileReader;n.onload=function(){let o=n.result;console.log("Load file: "+o);try{let e=JSON.parse(o);null==e.prompt?null!=e.name||null!=e.description||null!=e.personality?load_tavern_obj(e):null!=e.char_name||null!=e.char_persona?load_ooba_obj(e):msgbox("Could not load selected json file. Does not appear to be a KoboldAI story or compatible format."):(kai_json_load(e),t&&""!=t&&(last_known_filename=t))}catch(n){console.log(n);var r=new FileReader;r.onload=function(){var e=r.result,n=new Uint8Array(e),s=convertTavernPng(n);if(null!=s)load_tavern_obj(s);else if(null!=(s=getTavernExifJSON(n)))load_tavern_obj(s);else{try{s=UnzipKAISTORYFile(n)}catch(e){console.log("Unzip failed: "+e),s=null}null!=s?kai_json_load(s):t.endsWith(".txt")?msgboxYesNo('Could not load selected file!
It appears to be invalid or corrupted!

Do you still want to import it as plaintext?',"Loading Failed",(()=>{restart_new_game(),gametext_arr.push(o),hide_popups(),render_gametext()}),(()=>{hide_popups()}),!0):msgbox("Could not load selected file. Is it valid?")}},r.readAsArrayBuffer(e)}},n.readAsText(e)}function kai_json_load(e){restart_new_game();let t=null==(loaded_storyobj=e).file_version;if(console.log("Is oldui: "+t),t){""!=loaded_storyobj.prompt&&gametext_arr.push(loaded_storyobj.prompt);for(var n=0;n{let e=localsettings.my_api_key,t=localsettings.home_cluster,n=localsettings.saved_oai_key,o=localsettings.saved_oai_addr,r=localsettings.saved_claude_key,s=localsettings.saved_claude_addr;import_props_into_object(localsettings,loaded_storyobj.savedsettings),localsettings.my_api_key=e,localsettings.home_cluster=t,localsettings.saved_oai_key=n,localsettings.saved_oai_addr=o,localsettings.saved_claude_key=r,localsettings.saved_claude_addr=s,(1==localsettings.instruct_has_newlines||null!=loaded_storyobj.savedsettings&&null==loaded_storyobj.savedsettings.instruct_has_newlines&&null==loaded_storyobj.savedsettings.instruct_has_markdown)&&(localsettings.instruct_has_newlines=!1,localsettings.instruct_starttag.includes("\\n")||(localsettings.instruct_starttag="\\n"+localsettings.instruct_starttag+"\\n"),localsettings.instruct_endtag.includes("\\n")||(localsettings.instruct_endtag="\\n"+localsettings.instruct_endtag+"\\n")),hide_popups(),render_gametext()}),hide_popups)}else{for(var o in""!=loaded_storyobj.prompt&&gametext_arr.push(loaded_storyobj.prompt),loaded_storyobj.actions.actions){var r=loaded_storyobj.actions.actions[o];gametext_arr.push(r["Selected Text"])}if(loaded_storyobj.authornotetemplate&&(current_anotetemplate=loaded_storyobj.authornotetemplate),loaded_storyobj.authornote&&(current_anote=loaded_storyobj.authornote),loaded_storyobj.memory&&(current_memory=loaded_storyobj.memory),null!=loaded_storyobj.worldinfo_v2&&null!=loaded_storyobj.worldinfo_v2.entries)for(var o in loaded_storyobj.worldinfo_v2.entries){if((r=loaded_storyobj.worldinfo_v2.entries[o]).key.length>0&&null!=r.content){let e={key:r.key[0],keysecondary:r.keysecondary.length>0?r.keysecondary[0]:"",content:r.content,comment:r.comment,folder:null,selective:r.selective,constant:r.constant};current_wi.push(e)}}}render_gametext()}function load_tavern_obj(e){console.log("Loading tavern obj"),"chara_card_v2"==e.spec&&null!=e.data&&(e=e.data);let t=e.name?e.name:defaultchatopponent,n=localsettings.chatname&&""!=localsettings.chatname?localsettings.chatname:"You",o=e.description?"Persona: "+e.description:"";o+=e.personality?"\nPersonality: "+e.personality:"";let r=e.scenario?e.scenario:"",s=e.mes_example?e.mes_example:"",l=e.first_mes?e.first_mes:"";o=replaceAll(o,"{{char}}",t,!0),r=replaceAll(r,"{{char}}",t,!0),l=replaceAll(l,"{{char}}",t,!0),s=replaceAll(s,"{{char}}",t,!0),o=replaceAll(o,"{{user}}",n,!0),r=replaceAll(r,"{{user}}",n,!0),l=replaceAll(l,"{{user}}",n,!0),s=replaceAll(s,"{{user}}",n,!0),""!=r&&(r="\n[Scenario: "+r+"]"),""!=s&&(s="\n"+s);let a=o+r+s;if(""==a.trim()&&""==l&&e.entries){console.log("Append Tavern WI"),current_wi=[];for(let t in e.entries){var i=e.entries[t];let n={key:i.key.join(","),keysecondary:i.keysecondary.length>0?i.keysecondary.join(","):"",content:i.content,comment:i.comment,folder:null,selective:i.selective,constant:i.constant};current_wi.push(n)}}else restart_new_game(),localsettings.chatname=n,localsettings.chatopponent=t,gametext_arr.push("\n"+t+": "+l),current_memory=a+"\n",localsettings.opmode=3;render_gametext()}function load_ooba_obj(e){let t=e.char_name?e.char_name:defaultchatopponent,n=localsettings.chatname&&""!=localsettings.chatname?localsettings.chatname:"You",o=e.char_persona?"Persona: "+e.char_persona:"",r=e.world_scenario?e.world_scenario:"",s=e.example_dialogue?e.example_dialogue:"",l=e.char_greeting?e.char_greeting:"";o=replaceAll(o,"{{char}}",t),r=replaceAll(r,"{{char}}",t),l=replaceAll(l,"{{char}}",t),s=replaceAll(s,"{{char}}",t),o=replaceAll(o,"{{user}}",n),r=replaceAll(r,"{{user}}",n),l=replaceAll(l,"{{user}}",n),s=replaceAll(s,"{{user}}",n),""!=r&&(r="\n[Scenario: "+r+"]"),""!=s&&(s="\n"+s),restart_new_game(),localsettings.chatname=n,localsettings.chatopponent=t,gametext_arr.push("\n"+t+": "+l),current_memory=o+r+s+"\n",localsettings.opmode=3,render_gametext()}function get_aetherroom_scenario(){inputBox("Enter aetherroom.club prompt URL, or 4-digit prompt number","Import from aetherroom.club","","https://aetherroom.club/1234",(()=>{let e=document.getElementById("inputboxcontainerinput").value.toLowerCase().trim();""==e||(e.includes("aetherroom.club/")&&(e=e.replace("/api/","/"),e=e.split("aetherroom.club/")[1],e=e.split("/")[0],e=e.split("#")[0],e=e.split("?")[0]),""!=e&&isNumeric(e)&&e>0&&e<5e4?fetch(cors_proxy+"?https://aetherroom.club/api/"+e).then((e=>e.json())).then((e=>{if(console.log(e),temp_scenario={title:e.title?e.title:"",desc:e.description?e.description:"",opmode:2,adventure_context_mod:!1,prefmodel1:["nerys","nerybus","skein","adventure","erebus"],prefmodel2:[],prompt:e.promptContent?e.promptContent:"",memory:e.memory?e.memory:"",authorsnote:e.authorsNote?e.authorsNote:"",worldinfo:[]},e.worldInfos)for(let t=0;t{temp_scenario=null,document.getElementById("scenariodesc").innerText="Error: Selected scenario is invalid.",console.log("Error: "+e)})):(temp_scenario=null,document.getElementById("scenariodesc").innerText="Error: User input is invalid\n\n Please ensure you have input a valid aetherroom.club URL or ID (e.g. https://aetherroom.club/1234 or just 1234)"))}),!1)}function click_scenario(e){temp_scenario=scenario_db[e],preview_temp_scenario()}function preview_temp_scenario(){let e="";temp_scenario.author&&""!=temp_scenario.author&&(e="
Author: "+temp_scenario.author),document.getElementById("scenariodesc").innerHTML="

"+escapeHtml(temp_scenario.title)+"

Mode: "+(1==temp_scenario.opmode?"Story":2==temp_scenario.opmode?"Adventure":3==temp_scenario.opmode?"Chat":"Instruct")+e+"

"+(""!=temp_scenario.desc?escapeHtml(temp_scenario.desc):"[No Description Given]")+"

"}function complete_load_scenario(){if(console.log("Loading scenario..."),restart_new_game(),gametext_arr=[],""!=temp_scenario.prompt&&gametext_arr.push(temp_scenario.prompt),""!=temp_scenario.authorsnote&&(current_anote=temp_scenario.authorsnote),""!=temp_scenario.memory&&(current_memory=temp_scenario.memory),temp_scenario.worldinfo&&temp_scenario.worldinfo.length>0){current_wi=[];for(let e=0;eDisclaimer: The AI is not suitable to be used as an actual therapist, counselor or advisor of any kind.

\n\t\t\t

While some find it comforting to talk about their issues with an AI, the responses are unpredictable.

\n\t\t\t

When using the AI for real world use-cases such as advice or counseling this means you must be able to understand when an answer is wrong.\n\t\t\tIf you would not trust a random person to pretend to be your advisor; you should definitely not use the AI for this. The models are simply too small and not trained for this purpose.

\n\t\t\t

If you still wish to proceed, please type the phrase I understand in the box below, exactly as written.

\n\t\t\t

If you are experiencing feelings of distress, anxiety, suicidal thoughts, or other forms of mental discomfort, it's best to avoid using AI for non fiction or personal matters as it may exacerbate or encourage these feelings.

\n\t\t\t","AI Safety Warning","","Acknowledgement Required",(()=>{"i understand"==document.getElementById("inputboxcontainerinput").value.toLowerCase().trim()&&confirm_scenario()}),!0)}else confirm_scenario()}function confirm_scenario(){if(null!=temp_scenario){hide_popups();let e=!!document.getElementById("scenarioautopickai").checked,t=!!document.getElementById("scenarioallownsfw").checked;0!=selected_models.length||is_using_custom_ep()||(e=!0),e&&!localmode?fetch_models((e=>{if(0==e.length)msgbox("No models available. Unable to load.");else{let s=["erebus","shinen","horni","litv2","lit-6b"];selected_models=[];for(var n=0;n'+n.title+""}document.getElementById("scenariogrid").innerHTML=e,document.getElementById("scenariodesc").innerText="No Scenario Selected",togglescenarioallownsfw()}function scenario_search(){let e=document.getElementById("scenariogrid"),t=document.getElementById("scenariosearch").value.trim().toLowerCase(),n=document.getElementById("scenariosearchdropdown").value,o=e.children;for(let e=0;e{show_workers(e)}))}function get_workers(e){localmode?e([]):multifetch(worker_endpoints,((t,n)=>{if(t&&t.length>0){let n=[];for(let e=0;e"+c+""),t+=""+c+""+escapeHtml(o.models[0].substring(0,32))+""+o.max_length+" / "+o.max_context_length+"
("+r+" T/s)"+o.uptime+"
("+o.requests_fulfilled+" jobs)"+o.kudos_rewards.toFixed(0)+""+l+""}document.getElementById("workertable").innerHTML=t,document.getElementById("worktitlecount").innerText="Worker List - Total "+e.length}function show_my_own_workers(){let e=lastValidFoundUserData,t=find_text_horde(lastValidFoundCluster);if(lastValidFoundUserWorkers=[],t&&e&&e.worker_ids&&e.worker_ids.length>0){let n=e.worker_ids.map((e=>t.maintenance_endpoint+"/"+e));Promise.all(n.map((e=>fetch(e).then((e=>e.json()))))).then((e=>{lastValidFoundUserWorkers=e,console.log(e),document.getElementById("myownworkercontainer").classList.remove("hidden");let t="";for(var n=0;n"+escapeHtml(o.name.substring(0,32))+""+o.uptime+"
("+o.requests_fulfilled+" jobs)"+o.kudos_rewards.toFixed(0)+""+(o.online?"Online":"Offline")+""}document.getElementById("myownworkertable").innerHTML=t,localsettings.my_api_key=document.getElementById("apikey").value,null!=localsettings.my_api_key&&""!=localsettings.my_api_key||(localsettings.my_api_key=defaultsettings.my_api_key),autosave()})).catch((e=>{console.log("Error: "+e),msgbox(e,"Error fetching my workers")}))}else msgbox("Unable to find my horde workers.","No valid workers found")}function hide_workertable(){document.getElementById("workercontainer").classList.add("hidden"),document.getElementById("myownworkercontainer").classList.add("hidden")}function hide_popups(){document.getElementById("loadmodelcontainer").classList.add("hidden"),document.getElementById("newgamecontainer").classList.add("hidden"),document.getElementById("yesnocontainer").classList.add("hidden"),document.getElementById("settingscontainer").classList.add("hidden"),document.getElementById("msgboxcontainer").classList.add("hidden"),document.getElementById("memorycontainer").classList.add("hidden"),document.getElementById("workercontainer").classList.add("hidden"),document.getElementById("myownworkercontainer").classList.add("hidden"),document.getElementById("sharecontainer").classList.add("hidden"),document.getElementById("wicontainer").classList.add("hidden"),document.getElementById("customendpointcontainer").classList.add("hidden"),document.getElementById("quickstartcontainer").classList.add("hidden"),document.getElementById("zoomedimgcontainer").classList.add("hidden")}function explain_horde(){msgbox('The AI Horde generates text using crowdsourced GPUs by volunteer workers. By default your inputs are not logged, but as Horde workers are open source, they can be modified to do so.

In all cases, the sender will *always be anonymous*, however you are still advised to avoid sending privacy sensitive information.

For any issues, you can find us on discord at https://koboldai.org/discord',"Disclaimer",!0)}var pendingstyle="";function selectStyle(){inputBox("Style tags to use for generating images:\n(E.g. Sketch, Realistic, Anime, 3D Render, Drawing)\n\n","Extra Image Styles",pendingstyle,"Default Style",(()=>{let e=document.getElementById("inputboxcontainerinput").value;pendingstyle=e,console.log("Saved styles: "+pendingstyle)}),!1)}var msgboxOnDone=hide_popups;function hide_msgbox(){document.getElementById("msgboxcontainer").classList.add("hidden")}function msgbox(e,t="Error Encountered",n=!1,o=!1,r=hide_popups){e||(e=""),n?document.getElementById("msgboxtxt").innerHTML=e:document.getElementById("msgboxtxt").innerText=e,document.getElementById("msgboxtitle").innerText=t,document.getElementById("msgboxcontainer").classList.remove("hidden"),1==o?document.getElementById("msgboxbtnok").classList.add("hidden"):document.getElementById("msgboxbtnok").classList.remove("hidden"),msgboxOnDone=r,console.log("Msgbox: "+e)}var onYesFn=null,onNoFn=null;function msgboxYesNo(e,t,n,o,r=!1){e||(e=""),document.getElementById("yesnocontainer").classList.remove("hidden"),document.getElementById("yesnocontainertitle").innerText=t,r?document.getElementById("yesnocontainertext").innerHTML=e:document.getElementById("yesnocontainertext").innerText=e,onYesFn=n,onNoFn=o}var onInputboxOk=null;function inputBox(e,t,n,o,r,s=!1){e||(e=""),t||(t="User Input"),document.getElementById("inputboxcontainer").classList.remove("hidden"),document.getElementById("inputboxcontainertitle").innerText=t,s?document.getElementById("inputboxcontainertext").innerHTML=e:document.getElementById("inputboxcontainertext").innerText=e,document.getElementById("inputboxcontainerinput").value=escapeHtml(n),document.getElementById("inputboxcontainerinput").placeholder=escapeHtml(o),onInputboxOk=function(){document.getElementById("inputboxcontainer").classList.add("hidden"),r()}}function togglejailbreak(){document.getElementById("jailbreakprompt").checked?document.getElementById("jailbreakprompttext").classList.remove("hidden"):document.getElementById("jailbreakprompttext").classList.add("hidden")}function customapi_dropdown(){let e=document.getElementById("customapidropdown").value;document.getElementById("oaicustom").classList.add("hidden"),document.getElementById("koboldcustom").classList.add("hidden"),document.getElementById("scalecustom").classList.add("hidden"),document.getElementById("claudecustom").classList.add("hidden"),0==e?document.getElementById("koboldcustom").classList.remove("hidden"):1==e?(document.getElementById("oaicustom").classList.remove("hidden"),document.getElementById("custom_oai_endpoint").value=custom_oai_endpoint,""==custom_oai_key&&""!=localsettings.saved_oai_key&&(document.getElementById("custom_oai_key").value=localsettings.saved_oai_key,""!=localsettings.saved_oai_addr&&(document.getElementById("custom_oai_endpoint").value=localsettings.saved_oai_addr)),togglejailbreak()):2==e?document.getElementById("scalecustom").classList.remove("hidden"):3==e&&(document.getElementById("claudecustom").classList.remove("hidden"),document.getElementById("custom_claude_endpoint").value=custom_claude_endpoint,""==custom_claude_key&&""!=localsettings.saved_claude_key&&(document.getElementById("custom_claude_key").value=localsettings.saved_claude_key,""!=localsettings.saved_claude_addr&&(document.getElementById("custom_claude_endpoint").value=localsettings.saved_claude_addr)))}function connect_custom_endpoint(){custom_kobold_endpoint="",custom_oai_key="",custom_scale_key="";let e=document.getElementById("customapidropdown").value;if(0==e){let e=document.getElementById("customendpoint").value;if(null!=e&&""!=e.trim()){hide_popups(),e=e.trim(),e=e.endsWith("#")?e.slice(0,-1):e,e=e.endsWith("/")?e.slice(0,-1):e;let t=[apply_proxy_url(e+kobold_custom_mdl_endpoint),apply_proxy_url(e+kobold_custom_version_endpoint)];Promise.all(t.map((e=>fetch(e).then((e=>e.json()))))).then((t=>{console.log(t);let n=t[0].result,o=t[1].result;n?"ReadOnly"==n?(msgbox("The custom endpoint is working, but no model was loaded.\n\nPlease select and load a model and try again."),selected_models=[],selected_workers=[],custom_kobold_endpoint="",render_gametext()):(custom_kobold_endpoint=e,kobold_endpoint_version=o||"",selected_models=[{performance:100,queued:0,eta:0,name:n,count:1}],selected_workers=[],null==perfdata&&(perfdata={queued_requests:0,queued_tokens:0,past_minute_tokens:0,worker_count:0},document.body.classList.add("connected"),document.getElementById("connectstatus").classList.remove("color_orange"),document.getElementById("connectstatus").classList.add("color_green")),document.getElementById("connectstatus").innerHTML="Connected to Custom Endpoint",render_gametext(),localflag&&fetch(e+koboldcpp_version_endpoint).then((e=>e.json())).then((e=>{e&&""!=e&&e.version&&""!=e.version&&(koboldcpp_version=e.version,console.log("KoboldCpp Detected: "+koboldcpp_version))})).catch((e=>{console.log("Not using KoboldCpp")}))):(msgbox("Error at Custom Kobold Endpoint!\n\nThe custom endpoint failed to respond correctly."),selected_models=[],selected_workers=[],custom_kobold_endpoint="",render_gametext())})).catch((t=>{console.log("Error: "+t);let n=custom_kobold_endpoint.toLowerCase().includes("localhost")||custom_kobold_endpoint.toLowerCase().includes("127.0.0.1")||custom_kobold_endpoint.toLowerCase().includes("192.168.");uses_cors_proxy||n?(msgbox("Failed to connect to Custom Kobold Endpoint!\n\nPlease check if KoboldAI is running at the url: "+e),selected_models=[],selected_workers=[],custom_kobold_endpoint="",render_gametext()):(uses_cors_proxy=!0,connect_custom_endpoint())}))}}else if(1==e){let e=document.getElementById("custom_oai_key").value.trim(),t=document.getElementById("custom_oai_endpoint").value.trim();""==t&&(t=document.getElementById("custom_oai_endpoint").value=default_oai_base),""!=t&&"/"==t.slice(-1)&&(t=t.slice(0,-1)),""!=t&&t.length>4&&!t.slice(-4).toLowerCase().includes("/v")&&(t+="/v1"),""!=e&&""!=t&&(hide_popups(),fetch(t+oai_models_endpoint,{method:"GET",headers:{Authorization:"Bearer "+e,"x-api-key":e},referrerPolicy:"no-referrer"}).then((e=>e.json())).then((n=>{console.log(n),!n.error&&n.data&&n.data.length>0?(custom_oai_endpoint=t,custom_oai_key=e,localsettings.saved_oai_key=custom_oai_key,localsettings.saved_oai_addr=custom_oai_endpoint,custom_oai_model=document.getElementById("custom_oai_model").value.trim(),selected_models=[{performance:100,queued:0,eta:0,name:custom_oai_model,count:1}],selected_workers=[],null==perfdata&&(perfdata={queued_requests:0,queued_tokens:0,past_minute_tokens:0,worker_count:0},document.body.classList.add("connected"),document.getElementById("connectstatus").classList.remove("color_orange"),document.getElementById("connectstatus").classList.add("color_green")),document.getElementById("connectstatus").innerHTML="Connected to OAI Endpoint",render_gametext()):(custom_oai_endpoint=default_oai_base,custom_oai_key="",msgbox(JSON.stringify(n.error.message)))})).catch((e=>{console.log("Error: "+e),custom_oai_endpoint=default_oai_base,custom_oai_key="",msgbox("Error: "+e)})))}else if(2==e){let e=document.getElementById("custom_scale_key").value.trim(),t=document.getElementById("custom_scale_ID").value.trim();t=t.split("#")[0],t=t.split("?")[0],!t.includes("dashboard.scale.com/spellbook/api/v2/deploy/")||25!=e.length||e.includes(" ")||e.includes("/")?(t="",e="",msgbox("Invalid inputs, please try again.")):t=t.split("dashboard.scale.com/spellbook/api/v2/deploy/")[1],""!=e&&""!=t&&(hide_popups(),fetch(cors_proxy+"?"+scale_submit_endpoint+t,{method:"GET",headers:{Authorization:"Bearer "+e},referrerPolicy:"no-referrer"}).then((e=>e.json())).then((n=>{console.log(n),n.message&&""!=n.message?(custom_scale_key=e,custom_scale_ID=t,selected_models=[{performance:100,queued:0,eta:0,name:"SpellbookScaleAI",count:1}],selected_workers=[],null==perfdata&&(perfdata={queued_requests:0,queued_tokens:0,past_minute_tokens:0,worker_count:0},document.body.classList.add("connected"),document.getElementById("connectstatus").classList.remove("color_orange"),document.getElementById("connectstatus").classList.add("color_green")),document.getElementById("connectstatus").innerHTML="Connected to ScaleAI Endpoint",render_gametext()):(custom_scale_key="",msgbox("Cannot connect to Spellbook by ScaleAI"))})).catch((e=>{console.log("Error: "+e),custom_scale_key="",msgbox("Error: "+e)})))}else if(3==e){let e=document.getElementById("custom_claude_key").value.trim(),t=document.getElementById("custom_claude_endpoint").value.trim();""==t&&(t=document.getElementById("custom_claude_endpoint").value=default_claude_base),""!=t&&"/"==t.slice(-1)&&(t=t.slice(0,-1)),""!=t&&t.length>4&&!t.slice(-4).toLowerCase().includes("/v")&&(t+="/v1"),""!=e&&""!=t&&(hide_popups(),custom_claude_endpoint=t,custom_claude_key=e,localsettings.saved_claude_key=custom_claude_key,localsettings.saved_claude_addr=custom_claude_endpoint,custom_claude_model=document.getElementById("custom_claude_model").value.trim(),document.getElementById("clauderenamecompat").checked&&(localsettings.instruct_starttag="Human:",localsettings.chatname="Human",localsettings.instruct_endtag="Assistant:",localsettings.chatopponent="Assistant"),selected_models=[{performance:100,queued:0,eta:0,name:custom_claude_model,count:1}],selected_workers=[],null==perfdata&&(perfdata={queued_requests:0,queued_tokens:0,past_minute_tokens:0,worker_count:0},document.body.classList.add("connected"),document.getElementById("connectstatus").classList.remove("color_orange"),document.getElementById("connectstatus").classList.add("color_green")),document.getElementById("connectstatus").innerHTML="Connected to Claude Endpoint",render_gametext())}}function display_custom_endpoint(){document.getElementById("customendpointcontainer").classList.remove("hidden")}function fetch_models(e){localmode?e(selected_models):multifetch(models_endpoints,((t,n)=>{if(t&&t.length>0){let n=[];for(let e=0;e0?n.models[0]:"None",r=n.name,s=find_text_horde(n.cluster),l=s&&""!=s.tag?s.tag+" ":"",a=n.trusted?'style="color:#b700ff;"':"";a=n.maintenance_mode?'style="color:#ee4444;"':a;let i=n.trusted?" 💜":"";i=n.maintenance_mode?" ⛔":i,e+=""}document.getElementById("pickedmodel").innerHTML=e}else{let e="";for(let t=0;te.cluster==n.cluster&&e.name==n.name)).length>0?" selected":"",l=parseFloat(n.performance);if(!l||isNaN(l)||l>=99999){let e=worker_data.filter((e=>e.cluster==n.cluster&&e.models.includes(n.name)));if(e.length>0){l=0;for(let t=0;t"+r+escapeHtml(n.name)+" (ETA: "+n.eta+"s, Queue: "+n.queued+", Speed: "+l+", Qty: "+n.count+")"}e+='',document.getElementById("pickedmodel").innerHTML=e}}fetch_models((e=>{models_data=e,t=!0,t&&n&&r()})),get_workers((e=>{worker_data=e,n=!0,t&&n&&r()}))}function confirm_models(){let e=Array.from(document.getElementById("pickedmodel").selectedOptions).map((({value:e})=>e));if(1==e.length&&9999==e[0])hide_popups(),display_custom_endpoint();else{custom_kobold_endpoint="",custom_oai_key="",custom_scale_key="";const o=e.indexOf("9999");if(o>-1&&e.splice(o,1),e.length>0){let o=[],r=[],s=!!document.getElementById("manualworker").checked;for(var t=0;te.name==l[n]&&e.cluster==s.cluster));o.includes(e)||o.push(e)}}else{let n=models_data[e[t]];o.push(n)}o=o.filter((e=>e)),r=r.filter((e=>e));const l=o.every((e=>e.cluster===o[0].cluster)),a=r.every((e=>e.cluster===r[0].cluster));if(!l||!a)if(r.length>0){let e=get_most_common_cluster(r);r=r.filter((t=>t.cluster===e)),o=o.filter((t=>t.cluster===e))}else{let e=get_most_common_cluster(o);o=o.filter((t=>t.cluster===e))}selected_models=o,selected_workers=r,localsettings.my_api_key=document.getElementById("apikey").value,null!=localsettings.my_api_key&&""!=localsettings.my_api_key||(localsettings.my_api_key=defaultsettings.my_api_key),null!=desired_new_home_cluster&&(localsettings.home_cluster=desired_new_home_cluster,desired_new_home_cluster=null),document.getElementById("connectstatus").innerHTML="Connected to KoboldAI Horde",render_gametext(),hide_popups(),l&&a||msgbox("You've selected multiple workers from different clusters. Only one cluster will be used.","Caution")}}}function update_my_workers(){let e=document.getElementById("apikey").value,t=find_text_horde(lastValidFoundCluster);for(var n=0;ne.json())).then((e=>{msgbox(JSON.stringify(e),"Update My Worker")})).catch((e=>{console.error("Error:",e)}))}}}let desired_new_home_cluster=null,lastValidFoundUserData=null,lastValidFoundCluster=null,lastValidFoundUserWorkers=[];function fetch_kudo_balance(){if(localmode)return;desired_new_home_cluster=null;let e=document.getElementById("apikey").value;if(null!=e&&""!=e.trim()){document.getElementById("kudos_bal").innerHTML="Checking...
 ";let t={method:"GET",headers:{apikey:e}};multifetch(finduser_endpoints.map((e=>[e,t])),((e,t)=>{if(e&&e.length>0){lastValidFoundUserData=null,lastValidFoundCluster="";for(let t=0;t";e<0?(document.getElementById("kudos_bal").innerHTML=o+r+"
Kudos Balance: 0","anonymous#0"==t.toLowerCase()&&(document.getElementById("kudos_bal").innerHTML=o+t+"
(Register New User)")):document.getElementById("kudos_bal").innerHTML=o+r+"
Kudos Balance: "+e}else document.getElementById("kudos_bal").innerHTML="API Key Error
(Register New User)"}else console.log("Error: "+t),document.getElementById("kudos_bal").innerHTML="API Key Error
(Register New User)"}))}}function focus_api_keys(){var e=document.getElementById("apikey");e&&"password"===e.type&&(e.type="text"),(e=document.getElementById("custom_oai_key"))&&"password"===e.type&&(e.type="text"),(e=document.getElementById("custom_claude_key"))&&"password"===e.type&&(e.type="text")}function blur_api_keys(){var e=document.getElementById("apikey");e&&"text"===e.type&&(e.type="password"),(e=document.getElementById("custom_oai_key"))&&"text"===e.type&&(e.type="password"),(e=document.getElementById("custom_claude_key"))&&"text"===e.type&&(e.type="password")}function display_settings(){document.getElementById("settingscontainer").classList.remove("hidden"),document.getElementById("max_context_length").value=document.getElementById("max_context_length_slide").value=localsettings.max_context_length,document.getElementById("max_length").value=document.getElementById("max_length_slide").value=localsettings.max_length,document.getElementById("temperature").value=document.getElementById("temperature_slide").value=localsettings.temperature,document.getElementById("rep_pen").value=document.getElementById("rep_pen_slide").value=localsettings.rep_pen,document.getElementById("rep_pen_slope").value=localsettings.rep_pen_slope,document.getElementById("rep_pen_range").value=localsettings.rep_pen_range,document.getElementById("top_p").value=document.getElementById("top_p_slide").value=localsettings.top_p,document.getElementById("autoscroll").checked=localsettings.autoscroll,document.getElementById("export_settings").checked=localsettings.export_settings,document.getElementById("invert_colors").checked=localsettings.invert_colors,document.getElementById("trimsentences").checked=localsettings.trimsentences,document.getElementById("trimwhitespace").checked=localsettings.trimwhitespace,document.getElementById("persist_session").checked=localsettings.persist_session,document.getElementById("opmode").value=localsettings.opmode,document.getElementById("chatname").value=localsettings.chatname,document.getElementById("chatopponent").value=localsettings.chatopponent,document.getElementById("instruct_starttag").value=localsettings.instruct_starttag,document.getElementById("instruct_endtag").value=localsettings.instruct_endtag,document.getElementById("top_k").value=localsettings.top_k,document.getElementById("top_a").value=localsettings.top_a,document.getElementById("typ_s").value=localsettings.typ_s,document.getElementById("tfs_s").value=localsettings.tfs_s,document.getElementById("generate_images").value=localsettings.generate_images,document.getElementById("enhanced_chat_ui").checked=localsettings.enhanced_chat_ui,document.getElementById("multiline_replies").checked=localsettings.multiline_replies,document.getElementById("allow_continue_chat").checked=localsettings.allow_continue_chat,document.getElementById("idle_responses").value=localsettings.idle_responses,document.getElementById("idle_duration").value=localsettings.idle_duration,document.getElementById("adventure_context_mod").checked=localsettings.adventure_context_mod,document.getElementById("instruct_has_markdown").checked=localsettings.instruct_has_markdown,document.getElementById("auto_ctxlen").checked=localsettings.auto_ctxlen,document.getElementById("auto_genamt").checked=localsettings.auto_genamt,pendingstyle=localsettings.image_styles;let e=localsettings.sampler_order.toString();document.getElementById("sampler_order").value=e;let t="";for(var n=0;n'+presets[n].preset+"";t+='',document.getElementById("presets").innerHTML=t,document.getElementById("presets").value=localsettings.last_selected_preset;var o='';if("speechSynthesis"in window){let e=window.speechSynthesis.getVoices();console.log("speech synth available: "+e.length);for(n=0;n'+e[n].name+""}else console.log("No speech synth available");document.getElementById("ttsselect").innerHTML=o,document.getElementById("ttsselect").value=localsettings.speech_synth,document.getElementById("beep_on").checked=localsettings.beep_on,toggle_opmode();let r="";for(n=0;n';document.getElementById("sdmodels").innerHTML=r,document.getElementById("img_autogen").checked=localsettings.img_autogen,document.getElementById("save_images").checked=localsettings.save_images,document.getElementById("img_allownsfw").checked=localsettings.img_allownsfw}function toggle_preset(){let e=document.getElementById("presets").value,t=presets[e];t&&(temp_changingpreset=!0,document.getElementById("temperature").value=document.getElementById("temperature_slide").value=t.temp,document.getElementById("max_length").value=document.getElementById("max_length_slide").value=t.genamt,document.getElementById("top_k").value=t.top_k,document.getElementById("top_p").value=document.getElementById("top_p_slide").value=t.top_p,document.getElementById("top_a").value=t.top_a,document.getElementById("typ_s").value=t.typical,document.getElementById("tfs_s").value=t.tfs,document.getElementById("rep_pen").value=document.getElementById("rep_pen_slide").value=t.rep_pen,document.getElementById("rep_pen_range").value=t.rep_pen_range,document.getElementById("rep_pen_slope").value=t.rep_pen_slope,document.getElementById("sampler_order").value=t.sampler_order.toString())}function validate_sd_model(){var e=document.getElementById("generate_images").value;let t=!1;for(var n=0;n{pending_response_id="-1",waiting_for_autosummary=!0;let e=Math.floor(3.35*localsettings.max_context_length)-100,t=concat_gametext(!0,"");t=end_trim_to_sentence(t,!0),t.substring(t.length-e);let n=t.length>1800;t+="\n### Instruction:Summarize the above text in a single paragraph of up to "+(n?"ten":"five")+" detailed sentences.\n### Response:";let o={prompt:t,params:{n:1,max_context_length:localsettings.max_context_length,max_length:n?200:150,rep_pen:localsettings.rep_pen,temperature:localsettings.temperature,top_p:localsettings.top_p,top_k:localsettings.top_k,top_a:localsettings.top_a,typical:localsettings.typ_s,tfs:localsettings.tfs_s,rep_pen_range:localsettings.rep_pen_range,rep_pen_slope:localsettings.rep_pen_slope,sampler_order:localsettings.sampler_order},models:selected_models.map((e=>e.name))};o.workers=selected_workers.map((e=>e.id)),dispatch_submit_generation(o),render_gametext(),document.getElementById("memorytext").value="[<|Generating summary, do not close window...|>]"};0==gametext_arr.length||1==gametext_arr.length&&""==gametext_arr[0].trim()?console.log("Cannot summarize nothing."):""!=temp_automem_store.trim()?msgboxYesNo("This will modify existing memory. Proceed?","Confirm Modify",(()=>{document.getElementById("yesnocontainer").classList.add("hidden"),e()}),(()=>{document.getElementById("yesnocontainer").classList.add("hidden")})):e()}function handle_incoming_autosummary(e){waiting_for_autosummary=!1;let t=(e=replaceAll(e=(e=e.trim()).split("###")[0],"\n\n","\n")).split("\n"),n=200;if((e=t[0]).length<100&&t.length>1)for(var o=1;o5&&(e+="\n"+t[o]),!(n<=0));++o);e=end_trim_to_sentence(e,!0),""==temp_automem_store.trim()?document.getElementById("memorytext").value="[Summary: "+e+"]":document.getElementById("memorytext").value=temp_automem_store+"\n\n[Summary Continued: "+e+"]"}function clear_poll_flags(){pending_response_id="",poll_in_progress=!1,synchro_polled_response=null,synchro_pending_stream="",waiting_for_autosummary=!1}function restart_new_game(){idle_timer=0,gametext_arr=[],redo_arr=[],last_request_str="No Requests Available",retry_prev_text="",redo_prev_text="",nextgeneratedimagemilestone=generateimagesinterval,pending_response_id="",synchro_polled_response=null,synchro_pending_stream="",waiting_for_autosummary=!1,current_memory="",current_anote="",current_wi=[],pending_context_preinjection="",current_anotetemplate="[Author's note: <|>]",loaded_storyobj=generate_base_storyobj(),document.getElementById("input_text").value="",document.getElementById("cht_inp").value="",image_db={},completed_imgs_meta={},localsettings.adventure_is_action=!1,prev_hl_chunk=null,last_token_budget="",last_known_filename="",render_gametext()}function btn_editmode(){gametext_arr.length>0&&(document.getElementById("allowediting").checked=!0,toggle_editable())}function toggle_editable(){0==gametext_arr.length?selected_models.length>0||selected_workers.length>0?document.getElementById("allowediting").checked&&gametext_arr.push(""):document.getElementById("allowediting").checked=!1:1==gametext_arr.length&&""==gametext_arr[0]&&gametext_arr.pop(),render_gametext()}function end_trim_to_sentence(e,t=!1){let n=-1,o=[".","!","?","`","*",'"',")","}","`","]",";"];for(let t=0;t0?e.substring(0,n+1).trimEnd():e.trimEnd()}function start_trim_to_sentence(e){let t=e.indexOf("."),n=e.indexOf("!"),o=e.indexOf("?"),r=e.indexOf("\n"),s=t,l=!1;return n>0&&n0&&o0&&r0?l?e.substring(s+1):e.substring(s+2):e}function handle_typing(e){var t=(e=e||window.event).keyCode||e.which;if(!e.shiftKey&&13==t){let t=!!document.getElementById("entersubmit").checked,n=""!=document.getElementById("input_text").value;t&&(e.preventDefault(),n&&!document.getElementById("btnsend").disabled&&submit_generation())}}function show_abort_button(e){e?(document.getElementById("abortgen").classList.remove("hidden"),document.getElementById("chat_msg_send_btn_abort").classList.remove("hidden")):(document.getElementById("abortgen").classList.add("hidden"),document.getElementById("chat_msg_send_btn_abort").classList.add("hidden"))}function abort_generation(){let e=pending_response_id;if(is_using_custom_ep()&&""!=pending_response_id&&""!=synchro_pending_stream&&(synchro_polled_response=synchro_pending_stream,poll_in_progress=!1,poll_pending_response()),console.log("Generation "+pending_response_id+" aborted"),clear_poll_flags(),render_gametext(),pending_response_horde&&e&&""!=e&&!is_using_custom_ep()){let t=pending_response_horde.output_endpoint+"/"+e;fetch(t,{method:"DELETE"}).then((e=>e.json())).then((e=>{console.log(e)})).catch((e=>{console.error("Error:",e)}))}else is_using_newer_kcpp()&&fetch(custom_kobold_endpoint+koboldcpp_abort_endpoint,{method:"POST",headers:{"Content-Type":"application/json"}}).then((e=>e.json())).then((e=>{})).catch((e=>{console.error("Error:",e)}));show_abort_button(!1)}function manual_gen_image(){let e=concat_gametext(!0,"");var t=e.length;if(t>0){var n=e.substring(t-300,t);(n=end_trim_to_sentence(n=start_trim_to_sentence(n),!0)).length>0&&(generate_new_image(n),nextgeneratedimagemilestone=t+generateimagesinterval,document.getElementById("btn_genimg").disabled=!0,document.getElementById("btn_genimg2").disabled=!0,setTimeout((()=>{document.getElementById("btn_genimg").disabled=!1,document.getElementById("btn_genimg2").disabled=!1}),1e4))}}function submit_generation(){let e=document.getElementById("input_text").value,t=!1;if(""!=e.trim()||gametext_arr.length>0||""!=current_memory||""!=current_anote){if(waiting_for_autosummary=!1,idle_timer=0,idle_triggered_counter=0,localsettings.speech_synth>0&&"speechSynthesis"in window){let t=new window.SpeechSynthesisUtterance(e);t.voice=window.speechSynthesis.getVoices()[localsettings.speech_synth-1],window.speechSynthesis.speak(t)}4==localsettings.opmode&&""!=e&&(e=get_instruct_starttag(!1)+e+get_instruct_endtag(!1)),3==localsettings.opmode&&""!=e?e="\n"+localsettings.chatname+": "+e:3==localsettings.opmode&&""==e.trim()&&(e=""),2==localsettings.opmode&&""!=e&&localsettings.adventure_is_action&&(e="\n\n> "+e+"\n\n"),2==localsettings.opmode&&""!=e&&0==gametext_arr.length&&(localsettings.adventure_is_action||(localsettings.adventure_is_action=!0,""==current_memory.trim()&&(t=!0))),""!=e&&gametext_arr.push(e),redo_arr=[],retry_prev_text="",redo_prev_text="",document.getElementById("input_text").value="",pending_response_id="-1";let l=document.getElementById("maintxtloader");if(l){l.classList.remove("greenloader"),l.classList.remove("redloader");let e=document.getElementById("outerloadernum");e&&(e.innerText="")}let a=localsettings.max_context_length,i=localsettings.max_length;if(!is_using_custom_ep()&&(localsettings.auto_genamt||localsettings.auto_ctxlen)){let e=selected_workers;if((null==e||0==e.length)&&selected_models&&selected_models.length>0){e=[];for(let t=0;te&&""!=e)),e=e.map((e=>e.trim())),t=e[Math.floor(Math.random()*e.length)],r=e.length>1}let s=localsettings.chatname;null!=t&&""!=t||(t="");var n=new RegExp("\n(?!"+localsettings.chatname+").+?: ","gi"),o=(current_memory+current_anote+d).match(n);if(""==t&&null!=o&&o.length>0){let e=o[0].replace(": ","");e=e.trim(),""!=e&&(t=e)}if(0==current_anote.length&&0==current_memory.length&&gametext_arr.length>0&&gametext_arr[0].startsWith("\n"+s+": ")){let e="[The following is an interesting chat message log between "+s+" and "+t+".]\n\n"+localsettings.chatname+": Hi.\n"+t+": Hello.";""==t&&(e="[The following is an interesting chat message log between "+s+" and someone else.]\n\n"+localsettings.chatname+": Hi."),r&&(e="[The following is an interesting chat message log between "+s+" and multiple others.]\n\n"+localsettings.chatname+": Hi."),d=e+d}""!=t?(t=replaceAll(t,"\n",""),pending_context_preinjection="\n"+t+":"):pending_context_preinjection="\n",localsettings.allow_continue_chat&&""==e.trim()?pending_context_preinjection="":d+=pending_context_preinjection}if(""!=localsettings.generate_images&&3!=localsettings.opmode&&4!=localsettings.opmode&&localsettings.img_autogen)if(2==localsettings.opmode){if(e.startsWith("\n\n> "))generate_new_image(start_trim_to_sentence(d.substring(r-200,r)))}else{var r=d.length;if(r>nextgeneratedimagemilestone)nextgeneratedimagemilestone=r+generateimagesinterval,generate_new_image(end_trim_to_sentence(start_trim_to_sentence(d.substring(r-300,r)),!0))}let m=Math.floor(.9*c),u=current_memory.substring(current_memory.length-m);null!=u&&""!=u&&(u+="\n");let _=d;if(localsettings.case_sensitive_wi||(_=_.toLowerCase()),current_wi.length>0)for(var s=0;s_.includes(e.trim()))):n.some((e=>_.includes(e.trim().toLowerCase())));else{let t=e.keysecondary.split(",");if(localsettings.case_sensitive_wi){let e=n.some((e=>_.includes(e.trim()))),r=t.some((e=>_.includes(e.trim())));o=e&&r}else{let e=n.some((e=>_.includes(e.trim().toLowerCase()))),r=t.some((e=>_.includes(e.trim().toLowerCase())));o=e&&r}}o&&(u+=e.content+"\n")}let g=current_anotetemplate.replace("<|>",current_anote);if(g=g.substring(g.length-m),0==current_anote.length&&(g=""),u.length>0||current_anote.length>0){d=d.substring(d.length-c);let e=u.length+d.length+g.length-c;d=d.substring(e);let t=anote_strength,n=d.length-t;for(let e=0;e<10&&(n>=0&&ne.name))};p.workers=selected_workers.map((e=>e.id)),t?pending_response_id="":dispatch_submit_generation(p),render_gametext()}}function dispatch_submit_generation(e){if(console.log(e),last_request_str=JSON.stringify(e),startTimeTaken(),is_using_custom_ep())if(console.log("submit custom api"),pending_response_id="submit-v1-dummy-id",poll_ticks_passed=0,poll_in_progress=!1,synchro_polled_response=null,synchro_pending_stream="",""!=custom_kobold_endpoint){let t=e.prompt;(e=e.params).prompt=t;let n=!!document.getElementById("remoteconsolelog").checked;if(e.quiet=!n,kobold_endpoint_version&&""!=kobold_endpoint_version&&compare_version_str(kobold_endpoint_version,"1.2.1")>0){if(2==localsettings.opmode&&(e.stop_sequence=["\n> "]),3==localsettings.opmode&&(e.stop_sequence=[localsettings.chatname+":","\n"+localsettings.chatname+" "],localsettings.chatopponent.includes("||$||"))){let t=localsettings.chatopponent.split("||$||");t=t.filter((e=>e&&""!=e)),t=t.map((e=>e.trim()));for(let n=0;n0&&!e.stop_sequence&&(e.stop_sequence=[]);for(let n=0;n0&&(s=r),kobold_api_stream(apply_proxy_url(custom_kobold_endpoint+kobold_custom_gen_endpoint),e,e.max_length,"",o?s:4096)}else if(""!=custom_oai_key){let t=custom_oai_endpoint+oai_submit_endpoint,n=e.params.rep_pen-1,o={max_tokens:e.params.max_length,model:custom_oai_model,presence_penalty:n,temperature:e.params.temperature,top_p:e.params.top_p,logit_bias:{50256:-100}};"gpt-3.5-turbo"==custom_oai_model||"gpt-3.5-turbo-16k"==custom_oai_model||"gpt-4"==custom_oai_model||"gpt-4-32k"==custom_oai_model?(t=custom_oai_endpoint+oai_submit_endpoint_turbo,document.getElementById("jailbreakprompt")&&document.getElementById("jailbreakprompt").checked&&""!=document.getElementById("jailbreakprompttext").value?o.messages=[{role:"system",content:document.getElementById("jailbreakprompttext").value},{role:"user",content:e.prompt}]:o.messages=[{role:"user",content:e.prompt}]):o.prompt=e.prompt,fetch(t,{method:"POST",headers:{"Content-Type":"application/json","x-api-key":custom_oai_key,Authorization:"Bearer "+custom_oai_key},body:JSON.stringify(o),referrerPolicy:"no-referrer"}).then((e=>e.json())).then((e=>{if(console.log("sync finished response: "+JSON.stringify(e)),""!=custom_oai_key&&null!=e.choices&&e.choices.length>0){let t=e.choices[0];t.text?synchro_polled_response=t.text:t.message?synchro_polled_response=t.message.content:(console.error("Error, unknown OAI response"),clear_poll_flags(),render_gametext(),msgbox("Error, unknown OAI response"))}else console.error("error occurred in OAI generation"),clear_poll_flags(),render_gametext(),msgbox("Error occurred during text generation: "+formatError(e))})).catch((e=>{console.error("Error:",e),clear_poll_flags(),render_gametext(),msgbox("Error while submitting prompt: "+e)}))}else if(""!=custom_scale_key){let t=cors_proxy+"?"+scale_submit_endpoint+custom_scale_ID,n={input:{input:e.prompt}};fetch(t,{method:"POST",headers:{"Content-Type":"application/json",Authorization:"Basic "+custom_scale_key},body:JSON.stringify(n),referrerPolicy:"no-referrer"}).then((e=>e.json())).then((e=>{console.log("sync finished response: "+JSON.stringify(e)),""!=custom_scale_key&&null!=e.output&&""!=e.output?synchro_polled_response=e.output:(console.error("error occurred in Scale generation"),clear_poll_flags(),render_gametext(),msgbox("Error occurred during text generation: "+formatError(e)))})).catch((e=>{console.error("Error:",e),clear_poll_flags(),render_gametext(),msgbox("Error while submitting prompt: "+e)}))}else if(""!=custom_claude_key){let t=cors_proxy+"?"+(custom_claude_endpoint+claude_submit_endpoint),n={prompt:e.prompt,max_tokens_to_sample:e.params.max_length,model:custom_claude_model,top_k:e.params.top_k<=0?-1:e.params.top_k,temperature:e.params.temperature,top_p:e.params.top_p};fetch(t,{method:"POST",headers:{"Content-Type":"application/json","x-api-key":custom_claude_key,Authorization:"Bearer "+custom_claude_key},body:JSON.stringify(n),referrerPolicy:"no-referrer"}).then((e=>e.json())).then((e=>{console.log("sync finished response: "+JSON.stringify(e)),""!=custom_claude_key&&null!=e.completion&&""!=e.completion?synchro_polled_response=e.completion:(console.error("error occurred in Claude generation"),clear_poll_flags(),render_gametext(),msgbox("Error occurred during text generation: "+formatError(e)))})).catch((e=>{console.error("Error:",e),clear_poll_flags(),render_gametext(),msgbox("Error while submitting prompt: "+e)}))}else console.log("Unknown sync endpoint!");else{console.log("submit v2 api");let t=find_text_horde(localsettings.home_cluster);if(selected_workers.length>0){const e=selected_workers.filter((e=>e.cluster==localsettings.home_cluster)),n=selected_workers.filter((e=>e.cluster!=localsettings.home_cluster));if(0==e.length&&n.length>0){let e=find_text_horde(n[0].cluster);e&&(t=e)}}else if(selected_models.length>0){const e=selected_models.filter((e=>e.cluster==localsettings.home_cluster)),n=selected_models.filter((e=>e.cluster!=localsettings.home_cluster));if(0==e.length&&n.length>0){let e=find_text_horde(n[0].cluster);e&&(t=e)}}let n=t.baseurl==localsettings.home_cluster?localsettings.my_api_key:defaultsettings.my_api_key,o=t.client_agent,r={"Content-Type":"application/json",apikey:n};null!=o&&(r["Client-Agent"]=o),fetch(t.submit_endpoint,{method:"POST",headers:r,body:JSON.stringify(e)}).then((e=>e.json())).then((e=>{console.log("Success:",e),e.id&&""!=e.id?(pending_response_id=e.id,pending_response_horde=t,poll_ticks_passed=0,console.log("awaiting response for "+pending_response_id)):(clear_poll_flags(),render_gametext(),""!=e.message?msgbox(e.message):msgbox("Unspecified error while submitting prompt"))})).catch((e=>{console.error("Error:",e),clear_poll_flags(),render_gametext(),msgbox("Error while submitting prompt: "+e)}))}}function sanitize_horde_image_prompt(e){if(null==e||""==e)return"";return(e=(e=(e=(e=(e=e.replace(/\b(girl)\b/gim,"woman")).replace(/\b(boy)\b/gim,"man")).replace(/\b(girls)\b/gim,"women")).replace(/\b(boys)\b/gim,"men")).replace(/\b(under.age|under.aged|underage|underaged|loli|pedo|pedophile|(\w+).year.old|(\w+).years.old|minor|prepubescent|minors|shota)\b/gim,"")).match(/\b(cock|ahegao|hentai|uncensored|lewd|cocks|deepthroat|deepthroating|dick|dicks|cumshot|lesbian|fuck|fucked|fucking|sperm|naked|nipples|tits|boobs|breasts|boob|breast|topless|ass|butt|fingering|masturbate|masturbating|bitch|blowjob|pussy|piss|asshole|dildo|dildos|vibrator|erection|foreskin|handjob|nude|penis|porn|vibrator|virgin|vagina|vulva|threesome|orgy|bdsm|hickey|condom|testicles|anal|bareback|bukkake|creampie|stripper|strap-on|missionary|clitoris|clit|clitty|cowgirl|fleshlight|sex|buttplug|milf|oral|sucking|bondage|orgasm|scissoring|railed|slut|sluts|slutty|cumming|cunt|faggot|sissy|anal|anus|cum|semen|scat|nsfw|xxx|explicit|erotic|horny|aroused|jizz|moan|rape|raped|raping|throbbing|humping)\b/gim)&&(e=(e=e.replace(/\b(youngster|infant|baby|toddler|child|teen|kid|kiddie|kiddo|teenager|student|preteen|pre.teen)\b/gim,"person")).replace(/\b(young|younger|youthful|youth|small|smaller|smallest|girly|boyish|lil|tiny|teenaged|lit[tl]le|school.aged|school|highschool|kindergarten|teens|children|kids)\b/gim,"")),e}function generate_new_image(e){localsettings.image_styles&&""!=localsettings.image_styles&&(e=localsettings.image_styles+" "+e),filter_enabled&&(e=sanitize_horde_image_prompt(e)),console.log("Generating image for: "+e);let t=[];t="*"==localsettings.generate_images?[]:[localsettings.generate_images];let n={prompt:e+" ### disfigured, ugly, deformed, poorly, censor, censored, blurry, lowres, fused, malformed, watermark, misshapen, duplicated, grainy, distorted, signature",params:{cfg_scale:7,sampler_name:"k_euler_a",height:512,width:512,steps:20,karras:!1,n:1,seed:"",post_processing:[]},models:t,nsfw:!!localsettings.img_allownsfw,censor_nsfw:!localsettings.img_allownsfw,trusted_workers:!1,replacement_filter:!0,r2:!1};fetch(stablehorde_submit_endpoint,{method:"POST",headers:{"Content-Type":"application/json","Client-Agent":default_client_agent,apikey:localsettings.my_api_key},body:JSON.stringify(n)}).then((e=>e.json())).then((t=>{if(console.log("genimg result:",t),t.id&&""!=t.id){let n="[<|p|"+t.id+"|p|>]";gametext_arr.push(n),image_db[t.id]={done:!1,queue:"Starting",result:"",alt:e},console.log("New image queued "+n)}else msgbox("Image generation failed: "+t.message)})).catch((e=>{console.error("Error:",e),msgbox("Image generation error: "+e)}))}function click_image(e){if(e){document.getElementById("zoomedimgcontainer").classList.remove("hidden"),document.getElementById("zoomedimg").src=e.src;let t=e.title;t&&""!=t?(t=replaceAll(t,"
"," "),document.getElementById("zoomedimgdesc").innerText=t):document.getElementById("zoomedimgdesc").innerText="No Saved Description"}}function delete_curr_image(){let e=document.getElementById("zoomedimg").src;if(e&&""!=e){var t="[<|d|"+e+"|d|>]";for(let e=0;e'}{let e="Unavailable";if(null!=image_db[t]){let n=image_db[t].queue;s=image_db[t].alt?escapeHtml(image_db[t].alt):"",e=0==n?"Generating":"Starting"==n?n:"Queue: "+n}else console.log("Cannot render "+t);return'
'+t+'
'+e+"
"}}function handle_incoming_text(e,t,n,o){if(""!=extrastopseq){let t=replaceAll(extrastopseq,"\\n","\n").split("||$||");if(t.length>0)for(let n=0;n ")&&(t=e.split("\n> "),e=t[0])}if(3==localsettings.opmode){let t=e.indexOf(localsettings.chatname+":"),n=e.indexOf("\n"+localsettings.chatname+" "),o=[];if(-1!=t)o=e.split(localsettings.chatname+":");else if(-1!=n)o=e.split("\n"+localsettings.chatname+" ");else if(localsettings.multiline_replies)o.push(e);else if(0==e.indexOf('"')&&e.indexOf('"',1)>0){let t=e.indexOf('"',1);o.push(e.substring(0,t+1))}else o=e.split("\n");let r=o[0];r.length>0&&"\n"==r[r.length-1]&&(r=r.substring(0,r.length-1)),e=r}if(4==localsettings.opmode){let t=get_instruct_starttag(!0),n=get_instruct_endtag(!0),o=e.indexOf(t),r=[];-1!=o&&(r=e.split(t),e=r[0]),o=e.indexOf(n),r=[],-1!=o&&(r=e.split(n),e=r[0])}if(""!=pending_context_preinjection&&(""!=e&&" "!=e[0]&&3==localsettings.opmode&&(e=" "+e),e=pending_context_preinjection+e,pending_context_preinjection=""),localsettings.speech_synth>0&&"speechSynthesis"in window){let t=new window.SpeechSynthesisUtterance(e);t.voice=window.speechSynthesis.getVoices()[localsettings.speech_synth-1],window.speechSynthesis.speak(t)}""!=e&&gametext_arr.push(e),localsettings.beep_on&&playbeep();let r='Last request served by '+t+' using '+n+" for "+o+" kudos in "+getTimeTaken()+" seconds.";document.getElementById("lastreq").innerHTML=r,document.getElementById("lastreq2").innerHTML=r}function poll_image_db(){console.log("polling for pending images "+Object.keys(image_db).length);for(let e in image_db){let t=image_db[e];0==t.done&&fetch(stablehorde_poll_endpoint+"/"+e).then((e=>e.json())).then((n=>{console.log("pollimg result:",n),1==n.faulted||0==n.is_possible?(msgbox("Pending image generation could not complete."),console.log("removing from images: "+e),delete image_db[e]):1==n.done?(t.done=!0,fetch(stablehorde_output_endpoint+"/"+e).then((e=>e.json())).then((n=>{if(console.log("finalimg recv for "+e),1==n.faulted||0==n.is_possible)msgbox("Pending image generation could not complete."),console.log("removing from images: "+e),delete image_db[e];else{t.queue=0,compressImage("data:image/jpeg;base64,"+n.generations[0].img,(e=>{t.result=e}))}})).catch((t=>{console.error("Error:",t),msgbox("Image poll error: "+t),delete image_db[e]}))):t.queue=null==n.queue_position?"Error":n.queue_position})).catch((t=>{console.error("Error:",t),msgbox("Image poll error: "+t),delete image_db[e]}))}let e=!1;for(var t=0;t\]/.test(gametext_arr[t]))for(let n in image_db){let o=image_db[n],r="[<|p|"+n+"|p|>]";if(gametext_arr[t].includes(r)&&(e=!0,1==o.done&&""!=o.result)){let e="[<|d|"+o.result+"|d|>]";console.log("Replacing with Image: "+r),gametext_arr[t]=gametext_arr[t].replace(r,e),completed_imgs_meta[cyrb_hash(o.result)]={alt:image_db[n].alt},delete image_db[n]}}e&&document.activeElement!=document.getElementById("gametext")&&render_gametext()}function compressImage(e,t){let n=document.createElement("img");n.onload=function(){var e=document.createElement("canvas"),n=e.getContext("2d");e.width=256,e.height=256,n.drawImage(this,0,0,256,256);var o=e.toDataURL("image/jpeg",.33);t(o)},n.src=e}var idle_timer=0,idle_triggered_counter=0;function poll_background_tasks(){let e=1e3*localsettings.idle_duration,t=""==document.getElementById("input_text").value,n=""==document.getElementById("cht_inp").value;if((1==localsettings.opmode||3==localsettings.opmode)&&localsettings.idle_responses>0&&t&&n&&!document.getElementById("btnsend").disabled&&idle_triggered_countere){idle_timer=0;let e=++idle_triggered_counter;submit_generation(),idle_triggered_counter=e}console.log("Idling: "+idle_timer+", "+idle_triggered_counter)}else idle_timer=0}function poll_pending_response(){if(++poll_ticks_passed,is_using_custom_ep()||poll_ticks_passed%3==0)if(show_abort_button(!1),pending_response_id&&"-1"!=pending_response_id&&""!=pending_response_id)if(poll_ticks_passed>4/(.001*poll_interval_base_text)&&show_abort_button(!0),poll_in_progress)console.log("Polling still in progress for id: "+pending_response_id);else if(is_using_custom_ep())if(poll_in_progress=!0,null==synchro_polled_response){console.log("v1 still awaiting reply");let e=should_use_pseudostreaming();!!!document.getElementById("pseudostreaming").checked||e||waiting_for_autosummary||poll_ticks_passed%2!=0?poll_in_progress=!1:fetch(custom_kobold_endpoint+koboldcpp_check_endpoint,{method:"POST",headers:{"Content-Type":"application/json"}}).then((e=>e.json())).then((e=>{e&&null!=e.results&&e.results.length>0&&e.results[0].text&&pending_response_id&&""!=pending_response_id&&(synchro_pending_stream=e.results[0].text,render_gametext()),poll_in_progress=!1})).catch((e=>{console.error("Error:",e),poll_in_progress=!1}))}else{console.log("v1 handle recv reply"),pending_response_id="",poll_in_progress=!1;let e=synchro_polled_response;if(null!=e&&""!=e){let t=e,n="Custom Endpoint",o="0",r=selected_models.length>0?selected_models[0].name:"Unknown Model";waiting_for_autosummary?handle_incoming_autosummary(t):handle_incoming_text(t,n,r,o)}synchro_polled_response=null,synchro_pending_stream="",show_abort_button(!1),render_gametext()}else console.log("v2 Polling started for pending id: "+pending_response_id),poll_in_progress=!0,fetch(pending_response_horde.polling_endpoint+"/"+pending_response_id).then((e=>e.json())).then((e=>{if(null!=e.message||1==e.faulted||0==e.is_possible){console.log("Gave up on failed attempt"),clear_poll_flags(),render_gametext(),show_abort_button(!1);let t="Error encountered during text generation!\n";null!=e.message&&(t+=e.message),1==e.faulted&&(t+="Fault encountered during text generation."),0==e.is_possible&&(t+="No workers were able to generate text with your request."),msgbox(t)}else if(1==e.done)setTimeout((()=>{console.log("fetching completed generation for "+pending_response_id),fetch(pending_response_horde.output_endpoint+"/"+pending_response_id).then((e=>e.json())).then((e=>{if(console.log("Finished "+pending_response_id+": "+JSON.stringify(e)),pending_response_id="",poll_in_progress=!1,null!=e.generations&&e.generations.length>0){let t=e.generations[0].text,n=e.generations[0].worker_name,o=e.generations[0].model,r=e.kudos;waiting_for_autosummary?handle_incoming_autosummary(t):handle_incoming_text(t,n,o,r)}render_gametext(),show_abort_button(!1)})).catch((e=>{console.error("Error:",e),clear_poll_flags(),render_gametext(),show_abort_button(!1),msgbox("Error encountered during text generation!")}))}),500);else{poll_in_progress=!1;let t=document.getElementById("maintxtloader");if(t){t.classList.remove("greenloader"),t.classList.remove("redloader"),e.queue_position>0?t.classList.add("redloader"):1==e.processing&&0==e.queue_position&&t.classList.add("greenloader");let n=document.getElementById("outerloadernum");n&&(n.innerText=0==e.queue_position?"":e.queue_position)}console.log("Still awaiting "+pending_response_id+": "+JSON.stringify(e))}})).catch((e=>{console.error("Error:",e),clear_poll_flags(),render_gametext(),show_abort_button(!1),msgbox("Error encountered during text generation!")}));else console.log("Nothing to update: "+pending_response_id)}function click_gametext(){if(document.getElementById("allowediting").checked){if(void 0!==window.getSelection){const e=window.getSelection();null!=e.focusNode&&null!=e.focusNode.parentElement&&e.focusNode.parentElement.classList.contains("txtchunk")&&(null!=prev_hl_chunk&&prev_hl_chunk.classList.remove("hlchunk"),(prev_hl_chunk=e.focusNode.parentElement).classList.add("hlchunk")),idle_timer=0}}}function merge_edit_field(){if(gametext_arr.length>0&&document.getElementById("allowediting").checked){let t=concat_gametext(!0,"\n","",""),n=document.getElementById("gametext");if(t!=n.innerText){gametext_arr=[],redo_arr=[],retry_prev_text="",redo_prev_text="",n.querySelectorAll("div.storyimg,div.storyimgfloat").forEach((e=>{let t=e.getElementsByTagName("img")[0];e.replaceWith(null==t.alt||""==t.alt?"[<|d|"+t.src+"|d|>]":"[<|p|"+t.alt+"|p|>]")}));let t=[];n.querySelectorAll("span.txtchunk").forEach((e=>{t.push(e.innerText)})),n.innerHTML=n.innerHTML.replace(/(.+?)<\/span>/g,"$2"),n.innerHTML=n.innerHTML.replace(/(.+?)<\/span>/g,"$2"),n.innerHTML=replaceAll(n.innerHTML,"



","


"),n.innerHTML=replaceAll(n.innerHTML,"


","

"),n.innerHTML=replaceAll(n.innerHTML,"

","
");let o=n.innerText,r="";if(t.length>1){let e=t[t.length-1].length;e>0&&(r=o.slice(-e),o=o.slice(0,-e))}if(o.length>0){let t="\n";o.includes("\n\n")&&(t="\n\n");let n=o.split(t);for(var e=0;e0&&"\n"==gametext_arr[gametext_arr.length-1]?gametext_arr[gametext_arr.length-1]+=r:gametext_arr.push(r)),render_gametext(),console.log("Merged edit field. Parts:"+gametext_arr.length)}null!=prev_hl_chunk&&(prev_hl_chunk.classList.remove("hlchunk"),prev_hl_chunk=null)}}function concat_gametext(e=!1,t="",n="",o="",r=!1){let s="";for(let e=0;e\]/g,t),s=s.replace(/\[<\|d\|.+?\|d\|>\]/g,t),s=s.replace(/\[<\|.+?\|>\]/g,"")),s}function migrate_old_images_in_gametext(){let e=concat_gametext(!1,"","","",!1);if(!/\[<\|p\|.+?\|p\|>\]/.test(e)&&!/\[<\|d\|.+?\|d\|>\]/.test(e)&&(/<\|p\|.+?\|p\|>/.test(e)||/<\|d\|.+?\|d\|>/.test(e))){console.log("Migrating old images from saved story");for(let e=0;e/g,(function(e){return"["+e+"]"})),gametext_arr[e]=gametext_arr[e].replace(/<\|d\|.+?\|d\|>/g,(function(e){return"["+e+"]"}))}}function render_gametext(e=!1){if(document.getElementById("gametext").contentEditable=document.getElementById("allowediting").checked&&""==pending_response_id,2==localsettings.opmode?(document.getElementById("inputrow").classList.add("show_mode"),localsettings.adventure_is_action?(document.getElementById("adventure_mode_txt").innerText="Action",document.getElementById("adventure_mode_img").classList.add("input_action"),document.getElementById("adventure_mode_img").classList.remove("input_story")):(document.getElementById("adventure_mode_txt").innerText="Story",document.getElementById("adventure_mode_img").classList.remove("input_action"),document.getElementById("adventure_mode_img").classList.add("input_story")),document.getElementById("btnmode").classList.remove("hidden")):(document.getElementById("inputrow").classList.remove("show_mode"),document.getElementById("btnmode").classList.add("hidden")),0!=gametext_arr.length||""!=synchro_pending_stream&&""!=pending_response_id){let e="";if(e=document.getElementById("allowediting").checked?concat_gametext(!1,"",'',"",!0):concat_gametext(!1,"","","",!0),""!=synchro_pending_stream&&(e+=''+escapeHtml(pending_context_preinjection)+synchro_pending_stream+""),4!=localsettings.opmode||document.getElementById("allowediting").checked)e=replaceAll(e,get_instruct_starttag(!0),''+escapeHtml(get_instruct_starttag(!0))+""),e=replaceAll(e,get_instruct_endtag(!0),''+escapeHtml(get_instruct_endtag(!0))+"");else{if(e=replaceAll(e,"\n\n"+get_instruct_starttag(!0)+"\n\n","%SpcStg%"),e=replaceAll(e,"\n\n"+get_instruct_endtag(!0)+"\n\n","%SpcEtg%"),e=replaceAll(e,"\n"+get_instruct_starttag(!0)+"\n","%SpcStg%"),e=replaceAll(e,"\n"+get_instruct_endtag(!0)+"\n","%SpcEtg%"),e=replaceAll(e,get_instruct_starttag(!1),"%SpcStg%"),e=replaceAll(e,get_instruct_endtag(!1),"%SpcEtg%"),e=replaceAll(e,get_instruct_starttag(!0),"%SpcStg%"),e=replaceAll(e,get_instruct_endtag(!0),"%SpcEtg%"),localsettings.instruct_has_markdown&&""==synchro_pending_stream){e=e.replace(/(\n[-*] .+?)(%SpcStg%)/g,"$1\n$2");let t=(e.match(/```/g)||[]).length;t>0&&t%2!=0&&(e+="```"),e=simpleMarkdown(e)}e=replaceAll(e,"%SpcStg%",'
'),e=replaceAll(e,"%SpcEtg%",'
')}if(3==localsettings.opmode){let n="\n"+localsettings.chatname+": ";var t=new RegExp("\n(?!"+localsettings.chatname+").+?: ","gi");let o={},r=0;e=e.replace(t,(function(e){let t=escapeHtml(e),n=t.trim();return null==o[n]&&(o[n]=GetUniqueColor(r),++r),''+t+""})),e=replaceAll(e,n,''+escapeHtml(n)+"")}2==localsettings.opmode&&(e=e.replace(/\n\n\> .+?\n/g,(function(e){return''+e+""}))),e=e.replace(/\[<\|p\|.+?\|p\|>\]/g,(function(e){let t=e.substring(5,e.length-5);return t=render_image_html("",t),t})),e=e.replace(/\[<\|d\|.+?\|d\|>\]/g,(function(e){let t=e.substring(5,e.length-5);return t=render_image_html(t,""),t})),e=e.replace(/(\r\n|\r|\n)/g,"
"),e.endsWith("
")&&!e.endsWith("

")&&(e=e.slice(0,-4)),document.getElementById("gametext").innerHTML=e}else{if(null==perfdata)document.getElementById("gametext").innerHTML='Welcome to KoboldAI Lite!
You are in Offline Mode.
You will still be able to load and edit stories, but not generate new text.';else{let e="";e=""!=custom_kobold_endpoint?'
You\'re using the custom KoboldAI endpoint at '+custom_kobold_endpoint+"":""!=custom_oai_key?"
You're using the OpenAI API":""!=custom_scale_key?"
You're using the Spellbook by Scale AI API":""!=custom_claude_key?"
You're using the Claude API":'
There are '+selected_models.reduce(((e,t)=>e+t.count),0)+' volunteer(s) running selected models with a total queue length of '+selected_models.reduce(((e,t)=>e+t.queued),0)+" tokens",document.getElementById("gametext").innerHTML='Welcome to KoboldAI Lite!
You are using the models '+selected_models.reduce(((e,t)=>e+(""==e?"":", ")+t.name),"")+""+(0==selected_workers.length?"":" (Pinned to "+selected_workers.length+" worker IDs)")+"."+e+'.

Enter a prompt below to begin!
Or, select a Quick Start Scenario by clicking here.
'}document.getElementById("allowediting").checked&&(document.getElementById("allowediting").checked=!1,toggle_editable())}if(null==perfdata?(document.getElementById("topbtn_reconnect").classList.remove("hidden"),localmode?document.getElementById("topbtn_customendpt").classList.add("hidden"):document.getElementById("topbtn_customendpt").classList.remove("hidden"),document.getElementById("topbtn_ai").classList.add("hidden"),document.getElementById("topbtn_newgame").classList.remove("hidden"),document.getElementById("topbtn_save").classList.remove("hidden"),document.getElementById("topbtn_load").classList.remove("hidden"),document.getElementById("topbtn_settings").classList.add("hidden"),document.getElementById("topbtn_share").classList.add("hidden"),document.getElementById("topbtn_scenarios").classList.add("hidden"),document.getElementById("topbtn_quickplay").classList.add("hidden")):(document.getElementById("topbtn_reconnect").classList.add("hidden"),document.getElementById("topbtn_customendpt").classList.add("hidden"),localmode?document.getElementById("topbtn_ai").classList.add("hidden"):document.getElementById("topbtn_ai").classList.remove("hidden"),0==selected_models.length?(document.getElementById("topbtn_newgame").classList.add("hidden"),document.getElementById("topbtn_save").classList.add("hidden"),document.getElementById("topbtn_load").classList.add("hidden"),document.getElementById("topbtn_settings").classList.add("hidden"),document.getElementById("topbtn_share").classList.add("hidden"),document.getElementById("topbtn_scenarios").classList.add("hidden"),document.getElementById("topbtn_quickplay").classList.remove("hidden")):(document.getElementById("topbtn_newgame").classList.remove("hidden"),document.getElementById("topbtn_save").classList.remove("hidden"),document.getElementById("topbtn_load").classList.remove("hidden"),document.getElementById("topbtn_settings").classList.remove("hidden"),document.getElementById("topbtn_share").classList.remove("hidden"),document.getElementById("topbtn_scenarios").classList.remove("hidden"),document.getElementById("topbtn_quickplay").classList.add("hidden"))),0==selected_models.length?(document.getElementById("btn_actmem").disabled=!0,document.getElementById("btn_actwi").disabled=!0,document.getElementById("btn_actundo").disabled=!0,document.getElementById("btn_actredo").disabled=!0,document.getElementById("btn_actretry").disabled=!0,null==perfdata&&(document.getElementById("btn_actmem").disabled=!1,document.getElementById("btn_actwi").disabled=!1)):(document.getElementById("btn_actmem").disabled=!1,document.getElementById("btn_actwi").disabled=!1,document.getElementById("btn_actundo").disabled=!1,document.getElementById("btn_actredo").disabled=!1,document.getElementById("btn_actretry").disabled=!1),null==perfdata)document.getElementById("btnsend").disabled=!0,document.getElementById("btnsend").classList.add("wait"),document.getElementById("btnsend").classList.remove("btn-primary"),document.getElementById("btnsend").innerHTML="Offline",document.getElementById("fvico").href=favivon_normal;else if(0==selected_models.length&&0==selected_workers.length){document.getElementById("btnsend").disabled=!0,document.getElementById("btnsend").classList.add("wait"),document.getElementById("btnsend").classList.remove("btn-primary"),document.getElementById("btnsend").innerHTML="No AI
Loaded";let e='There are '+perfdata.worker_count+' total volunteer(s) in the KoboldAI Horde, and '+perfdata.queued_requests+' request(s) in queues.
A total of '+perfdata.past_minute_tokens+" tokens were generated in the last minute.

";document.getElementById("gametext").innerHTML='Welcome to KoboldAI Lite!

'+e+'Please select an AI model to use!
',document.getElementById("fvico").href=favivon_normal}else if(""==pending_response_id)document.getElementById("btnsend").disabled=!1,document.getElementById("btnsend").classList.remove("wait"),document.getElementById("btnsend").classList.add("btn-primary"),document.getElementById("btnsend").innerHTML="Submit",document.getElementById("fvico").href=favivon_normal;else{document.getElementById("btnsend").disabled=!0,document.getElementById("btnsend").classList.add("wait"),document.getElementById("btnsend").classList.remove("btn-primary");let e='
';document.getElementById("btnsend").innerHTML!=e&&(document.getElementById("btnsend").innerHTML=e),document.getElementById("fvico").href=favicon_busy}let n=!!document.getElementById("allowediting").checked;localsettings.enhanced_chat_ui&&3==localsettings.opmode&&!n?(0==gametext_arr.length?render_enhanced_chat(document.getElementById("gametext").innerHTML):render_enhanced_chat(concat_gametext(!1,"","","",!0)),document.getElementById("enhancedchatinterface").classList.remove("hidden"),document.getElementById("normalinterface").classList.add("hidden")):(document.getElementById("enhancedchatinterface").classList.add("hidden"),document.getElementById("normalinterface").classList.remove("hidden")),document.getElementById("btnautogenmem").disabled=document.getElementById("btnsend").disabled,localsettings.persist_session&&autosave(),0==e&&localsettings.autoscroll&&(document.getElementById("gametext").scrollTop=document.getElementById("gametext").scrollHeight,document.getElementById("chat_msg_body").scrollTop=document.getElementById("chat_msg_body").scrollHeight),idle_timer=0,document.getElementById("token-budget").innerText=last_token_budget}function render_enhanced_chat(e){var t=document.getElementById("chat_msg_body");if(!t)return;let n="",o=!1;var r=new RegExp("(?!"+localsettings.chatname+").+?: ","gi"),s=new RegExp("\\|[d|p]\\|>(?!"+localsettings.chatname+").+?\\: ","gi");let l=[];e=(e=e.replace(s,(function(e){return e.substring(0,4)+"\n"+e.substring(4)}))).split("\n");localsettings.chatname;for(var a=new RegExp("("+localsettings.chatname+"): ","gi"),i=0;i0?(o=!0,l.push({name:d[0].substring(0,d[0].length-2),msg:t.split(d[0])[1],myturn:o})):null!=c&&c.length>0?(o=!1,l.push({name:c[0].substring(0,c[0].length-2),msg:t.split(c[0])[1],myturn:o})):0==l.length?""!=t.trim()&&l.push({name:"",msg:t,myturn:o}):l[l.length-1].msg+="
"+t)}let m={},u=0;for(i=0;i\]/g,(function(e){let t=e.substring(5,e.length-5);return t=render_image_html("",t,!1),t})),e.msg=e.msg.replace(/\[<\|d\|.+?\|d\|>\]/g,(function(e){let t=e.substring(5,e.length-5);return t=render_image_html(t,"",!1),t})),e.msg=e.msg.replace(/\[<\|.+?\|>\]/g,""),e.msg=e.msg.replace(/\*(\S[^*]+\S)\*/g,"$1")),e.myturn){n+='

'+(""!=e.name?''+escapeHtml(e.name)+"
":"")+e.msg+"

"}else{let t=escapeHtml(e.name),o=t.trim();null==m[o]&&(m[o]=GetUniqueColor(u),++u),n+='

'+(""!=e.name?"'+t+"
":"")+e.msg+"

"}}""!=synchro_pending_stream&&(n+='

'+escapeHtml(pending_context_preinjection)+synchro_pending_stream+"

"),t.innerHTML=n,""==pending_response_id?document.getElementById("chatistyping").classList.add("hidden"):(document.getElementById("chatistyping").classList.remove("hidden"),null!=pending_context_preinjection&&""!=pending_context_preinjection&&pending_context_preinjection.includes(":")?document.getElementById("chataityping").innerText=pending_context_preinjection.split(":")[0]+" is typing...":document.getElementById("chataityping").innerText="The AI is typing..."),document.getElementById("chat_msg_send_btn").disabled=document.getElementById("btnsend").disabled}function chat_handle_typing(e){var t=(e=e||window.event).keyCode||e.which;if(!e.shiftKey&&13==t){let t=!0;document.getElementById("cht_inp").value;t&&(e.preventDefault(),document.getElementById("btnsend").disabled||chat_submit_generation())}}function chat_submit_generation(){document.getElementById("input_text").value=document.getElementById("cht_inp").value,submit_generation(),document.getElementById("cht_inp").value=""}function chat_toggle_actionmenu(){var e=document.getElementById("actionmenu2");e.classList.contains("hidden")?e.classList.remove("hidden"):e.classList.add("hidden")}function autosave(){if(localStorage.setItem((localmode?"e_":"")+"kaihordewebui_settings",JSON.stringify(localsettings)),localsettings.persist_session){let e=generate_compressed_story();localStorage.setItem((localmode?"e_":"")+"kaihordewebui_story",e)}console.log("autosave done")}function btn_adventure_mode(){localsettings.adventure_is_action=!localsettings.adventure_is_action,render_gametext()}function btn_memory(){document.getElementById("memorycontainer").classList.remove("hidden"),document.getElementById("memorytext").value=current_memory,document.getElementById("anotetext").value=current_anote,document.getElementById("anotetemplate").value=current_anotetemplate,document.getElementById("anote_strength").value=anote_strength,document.getElementById("extrastopseq").value=extrastopseq}function toggle_wi_sk(e){var t=current_wi[e];t.selective=!t.selective;var n=document.getElementById("wiskt"+e),o=document.getElementById("wikeysec"+e);t.selective?(n.classList.add("witoggleron"),n.classList.remove("witoggleroff"),o.classList.remove("hidden")):(n.classList.remove("witoggleron"),n.classList.add("witoggleroff"),o.classList.add("hidden"))}function toggle_wi_ck(e){var t=current_wi[e];t.constant=!t.constant;var n=document.getElementById("wickt"+e);t.constant?(n.classList.add("witoggleron"),n.classList.remove("witoggleroff")):(n.classList.remove("witoggleron"),n.classList.add("witoggleroff"))}function del_wi(e){save_wi();current_wi[e];current_wi.splice(e,1),btn_wi()}function add_wi(){save_wi();current_wi.push({key:"",keysecondary:"",content:"",comment:"",folder:null,selective:!1,constant:!1}),btn_wi()}function save_wi(){for(var e=0;e';for(var t=0;t\n\t\t\n\t\t\n\t\t\n\t\t\n\t\t\n\t\t\n\t\t\t📑\n\t\t\t📌\n\t\t\t\n\t\t\n\t\t'}0==current_wi.length&&(selectionhtml='
No world info.
Click [+] to add a new entry.
'),selectionhtml+="",e.innerHTML=selectionhtml}var backLongPressTimer=null;function btn_back_longpress_start(){backLongPressTimer=setTimeout((()=>{if(console.log("Clear story"),""==pending_response_id&&gametext_arr.length>0){for(;gametext_arr.length>0;)if(""!=retry_prev_text)redo_prev_text=gametext_arr.pop(),gametext_arr.push(retry_prev_text),retry_prev_text="";else{let e=gametext_arr.pop();redo_arr.push(e)}render_gametext()}}),3e3)}function btn_back_longpress_end(){clearTimeout(backLongPressTimer)}function btn_back(){if(""==pending_response_id&&gametext_arr.length>0){if(""!=retry_prev_text)redo_prev_text=gametext_arr.pop(),gametext_arr.push(retry_prev_text),retry_prev_text="";else{let e=gametext_arr.pop();redo_arr.push(e)}render_gametext()}}var redoLongPressTimer=null;function btn_redo_longpress_start(){redoLongPressTimer=setTimeout((()=>{if(console.log("Redo All story"),""==pending_response_id&&redo_arr.length>0){for(;redo_arr.length>0;){retry_prev_text="";let e=redo_arr.pop();gametext_arr.push(e)}btn_redo(),render_gametext()}}),3e3)}function btn_redo_longpress_end(){clearTimeout(redoLongPressTimer)}function btn_redo(){if(""==pending_response_id)if(redo_arr.length>0){retry_prev_text="";let e=redo_arr.pop();gametext_arr.push(e),render_gametext()}else""!=redo_prev_text&&(retry_prev_text=gametext_arr.pop(),gametext_arr.push(redo_prev_text),redo_prev_text="",render_gametext())}function btn_retry(){if(""==pending_response_id&&gametext_arr.length>1){let e=document.getElementById("input_text").value;document.getElementById("input_text").value="";let t=gametext_arr[gametext_arr.length-1];redo_prev_text="",retry_prev_text="",gametext_arr.pop(),submit_generation(),retry_prev_text=t,redo_arr=[],document.getElementById("input_text").value=e}}function toggleNavWithoutBootstrapJS(){var e=document.getElementById("navbarNavDropdown");e.classList.contains("collapse")?e.classList.remove("collapse"):e.classList.add("collapse")}const clamp=(e,t,n)=>Math.min(Math.max(e,t),n),cleannum=function(e,t,n){let o=isNaN(e)?0:e;return clamp(o,t,n)}
diff --git a/koboldcpp.py b/koboldcpp.py index 8b96e8c3ad635..80c3d79be6a15 100755 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -978,7 +978,7 @@ def togglehorde(a,b,c): else: item.grid_forget() labels[idx].grid_forget() - if usehorde_var.get()==1 and horde_name_var.get()=="koboldcpp" and model_var.get()!="": + if usehorde_var.get()==1 and (horde_name_var.get()=="koboldcpp" or horde_name_var.get()=="") and model_var.get()!="": basefile = os.path.basename(model_var.get()) horde_name_var.set(os.path.splitext(basefile)[0]) @@ -1388,6 +1388,7 @@ def make_url_request(url, data, method='POST'): current_id = None current_payload = None current_generation = None + sleepy_counter = 0 #if this exceeds a value, worker becomes sleepy (slower) print("===\nEmbedded Horde Worker '"+worker_name+"' Starting...\n(To use your own KAI Bridge/Scribe worker instead, don't set your API key)") BRIDGE_AGENT = f"KoboldCppEmbedWorker:1:https://github.com/LostRuins/koboldcpp" cluster = "https://horde.koboldai.net" @@ -1424,9 +1425,13 @@ def make_url_request(url, data, method='POST'): time.sleep(5) continue if not pop["id"]: - #print(f"Server {cluster} has no valid generations to do for us.") - time.sleep(3) + slp = (2 if sleepy_counter<10 else (3 if sleepy_counter<20 else 4)) + #print(f"Server {cluster} has no valid generations for us. Sleep for {slp}s") + time.sleep(slp) + sleepy_counter += 1 continue + + sleepy_counter = 0 current_id = pop['id'] current_payload = pop['payload'] print(f"\nJob received from {cluster} for {current_payload.get('max_length',80)} tokens and {current_payload.get('max_context_length',1024)} max context. Starting generation...") From 5b2b2dc6ae8086bff7c9b3c17fb435cf319b7185 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Mon, 24 Jul 2023 14:46:21 +0300 Subject: [PATCH 4/8] ggml : sync (unary ops refactor, static-correctness) (#2370) * ggml : sync (unary ops, tests) ggml-ci * tests : remove unnecessary funcs --- ggml-cuda.cu | 29 +- ggml-metal.m | 96 +++--- ggml.c | 775 +++++++++++++++++++++------------------------ ggml.h | 60 +++- tests/test-grad0.c | 479 +++++++++++++++++++++++----- tests/test-opt.c | 6 +- 6 files changed, 870 insertions(+), 575 deletions(-) diff --git a/ggml-cuda.cu b/ggml-cuda.cu index 6823adc6cc958..b8c98354da192 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -3962,18 +3962,23 @@ bool ggml_cuda_compute_forward(struct ggml_compute_params * params, struct ggml_ } func = ggml_cuda_mul; break; - case GGML_OP_GELU: - if (!any_on_device) { - return false; - } - func = ggml_cuda_gelu; - break; - case GGML_OP_SILU: - if (!any_on_device) { - return false; - } - func = ggml_cuda_silu; - break; + case GGML_OP_UNARY: + switch (ggml_get_unary_op(tensor)) { + case GGML_UNARY_OP_GELU: + if (!any_on_device) { + return false; + } + func = ggml_cuda_gelu; + break; + case GGML_UNARY_OP_SILU: + if (!any_on_device) { + return false; + } + func = ggml_cuda_silu; + break; + default: + return false; + } break; case GGML_OP_NORM: if (!any_on_device) { return false; diff --git a/ggml-metal.m b/ggml-metal.m index bf3f68fe45726..1fd6e857ffe61 100644 --- a/ggml-metal.m +++ b/ggml-metal.m @@ -519,48 +519,56 @@ void ggml_metal_graph_compute( [encoder dispatchThreadgroups:MTLSizeMake(n, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)]; } break; - case GGML_OP_SILU: - { - if (encoder == nil) { - encoder = [command_buffer computeCommandEncoder]; - } - - [encoder setComputePipelineState:ctx->pipeline_silu]; - [encoder setBuffer:id_src0 offset:offs_src0 atIndex:0]; - [encoder setBuffer:id_dst offset:offs_dst atIndex:1]; - - const int64_t n = ggml_nelements(dst); - - [encoder dispatchThreadgroups:MTLSizeMake(n, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)]; - } break; - case GGML_OP_RELU: - { - if (encoder == nil) { - encoder = [command_buffer computeCommandEncoder]; - } - - [encoder setComputePipelineState:ctx->pipeline_relu]; - [encoder setBuffer:id_src0 offset:offs_src0 atIndex:0]; - [encoder setBuffer:id_dst offset:offs_dst atIndex:1]; - - const int64_t n = ggml_nelements(dst); - - [encoder dispatchThreadgroups:MTLSizeMake(n, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)]; + case GGML_OP_UNARY: + switch (ggml_get_unary_op(gf->nodes[i])) { + case GGML_UNARY_OP_SILU: + { + if (encoder == nil) { + encoder = [command_buffer computeCommandEncoder]; + } + + [encoder setComputePipelineState:ctx->pipeline_silu]; + [encoder setBuffer:id_src0 offset:offs_src0 atIndex:0]; + [encoder setBuffer:id_dst offset:offs_dst atIndex:1]; + + const int64_t n = ggml_nelements(dst); + + [encoder dispatchThreadgroups:MTLSizeMake(n, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)]; + } break; + case GGML_UNARY_OP_RELU: + { + if (encoder == nil) { + encoder = [command_buffer computeCommandEncoder]; + } + + [encoder setComputePipelineState:ctx->pipeline_relu]; + [encoder setBuffer:id_src0 offset:offs_src0 atIndex:0]; + [encoder setBuffer:id_dst offset:offs_dst atIndex:1]; + + const int64_t n = ggml_nelements(dst); + + [encoder dispatchThreadgroups:MTLSizeMake(n, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)]; + } break; + case GGML_UNARY_OP_GELU: + { + if (encoder == nil) { + encoder = [command_buffer computeCommandEncoder]; + } + + [encoder setComputePipelineState:ctx->pipeline_gelu]; + [encoder setBuffer:id_src0 offset:offs_src0 atIndex:0]; + [encoder setBuffer:id_dst offset:offs_dst atIndex:1]; + + const int64_t n = ggml_nelements(dst); + + [encoder dispatchThreadgroups:MTLSizeMake(n, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)]; + } break; + default: + { + fprintf(stderr, "%s: node %3d, op = %8s not implemented\n", __func__, i, ggml_op_name(dst->op)); + GGML_ASSERT(false); + } } break; - case GGML_OP_GELU: - { - if (encoder == nil) { - encoder = [command_buffer computeCommandEncoder]; - } - - [encoder setComputePipelineState:ctx->pipeline_gelu]; - [encoder setBuffer:id_src0 offset:offs_src0 atIndex:0]; - [encoder setBuffer:id_dst offset:offs_dst atIndex:1]; - - const int64_t n = ggml_nelements(dst); - - [encoder dispatchThreadgroups:MTLSizeMake(n, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)]; - } break; case GGML_OP_SOFT_MAX: { if (encoder == nil) { @@ -979,8 +987,10 @@ void ggml_metal_graph_compute( [encoder dispatchThreadgroups:MTLSizeMake(ne01, ne02, ne03) threadsPerThreadgroup:MTLSizeMake(nth, 1, 1)]; } break; default: - fprintf(stderr, "%s: node %3d, op = %8s not implemented\n", __func__, i, ggml_op_name(dst->op)); - GGML_ASSERT(false); + { + fprintf(stderr, "%s: node %3d, op = %8s not implemented\n", __func__, i, ggml_op_name(dst->op)); + GGML_ASSERT(false); + } } } diff --git a/ggml.c b/ggml.c index 9ee4a8d7f687b..960b8057709a9 100644 --- a/ggml.c +++ b/ggml.c @@ -3440,7 +3440,9 @@ inline static void ggml_vec_mad_f32(const int n, float * restrict y, const float //inline static void ggml_vec_scale_f32(const int n, float * y, const float v) { for (int i = 0; i < n; ++i) y[i] *= v; } inline static void ggml_vec_scale_f32(const int n, float * y, const float v) { -#if defined(GGML_SIMD) +#if defined(GGML_USE_ACCELERATE) + vDSP_vsmul(y, 1, &v, y, 1, n); +#elif defined(GGML_SIMD) const int np = (n & ~(GGML_F32_STEP - 1)); GGML_F32_VEC vx = GGML_F32_VEC_SET1(v); @@ -3603,7 +3605,7 @@ inline static void ggml_vec_sum_f32(const int n, float * s, const float * x) { #endif } -inline static void ggml_vec_sum_ggf(const int n, ggml_float * s, const float * x) { +inline static void ggml_vec_sum_f32_ggf(const int n, ggml_float * s, const float * x) { ggml_float sum = 0.0; for (int i = 0; i < n; ++i) { sum += (ggml_float)x[i]; @@ -3611,6 +3613,14 @@ inline static void ggml_vec_sum_ggf(const int n, ggml_float * s, const float * x *s = sum; } +inline static void ggml_vec_sum_f16_ggf(const int n, float * s, const ggml_fp16_t * x) { + float sum = 0.0f; + for (int i = 0; i < n; ++i) { + sum += GGML_FP16_TO_FP32(x[i]); + } + *s = sum; +} + inline static void ggml_vec_max_f32(const int n, float * s, const float * x) { #ifndef GGML_USE_ACCELERATE float max = -INFINITY; @@ -3750,16 +3760,6 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = { "ARGMAX", "REPEAT", "REPEAT_BACK", - "ABS", - "SGN", - "NEG", - "STEP", - "TANH", - "ELU", - "RELU", - "GELU", - "GELU_QUICK", - "SILU", "SILU_BACK", "NORM", "RMS_NORM", @@ -3798,6 +3798,8 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = { "WIN_PART", "WIN_UNPART", + "UNARY", + "MAP_UNARY", "MAP_BINARY", @@ -3809,7 +3811,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = { "CROSS_ENTROPY_LOSS_BACK", }; -static_assert(GGML_OP_COUNT == 68, "GGML_OP_COUNT != 68"); +static_assert(GGML_OP_COUNT == 59, "GGML_OP_COUNT != 59"); static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = { "none", @@ -3830,16 +3832,6 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = { "argmax(x)", "repeat(x)", "repeat_back(x)", - "abs(x)", - "sgn(x)", - "-x", - "step(x)", - "tanh(x)", - "elu(x)", - "relu(x)", - "gelu(x)", - "gelu_quick(x)", - "silu(x)", "silu_back(x)", "norm(x)", "rms_norm(x)", @@ -3878,6 +3870,8 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = { "win_part(x)", "win_unpart(x)", + "unary(x)", + "f(x)", "f(x,y)", @@ -3889,7 +3883,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = { "cross_entropy_loss_back(x,y)", }; -static_assert(GGML_OP_COUNT == 68, "GGML_OP_COUNT != 68"); +static_assert(GGML_OP_COUNT == 59, "GGML_OP_COUNT != 59"); static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2"); @@ -4145,6 +4139,10 @@ const char * ggml_op_name(enum ggml_op op) { return GGML_OP_NAME[op]; } +const char * ggml_op_symbol(enum ggml_op op) { + return GGML_OP_SYMBOL[op]; +} + size_t ggml_element_size(const struct ggml_tensor * tensor) { return GGML_TYPE_SIZE[tensor->type]; } @@ -4443,6 +4441,10 @@ size_t ggml_set_scratch(struct ggml_context * ctx, struct ggml_scratch scratch) return result; } +bool ggml_get_no_alloc(struct ggml_context * ctx) { + return ctx->no_alloc; +} + void ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc) { ctx->no_alloc = no_alloc; } @@ -4480,7 +4482,7 @@ size_t ggml_get_max_tensor_size(const struct ggml_context * ctx) { // this is an error prone process, but it is necessary to support inplace // operators when using scratch buffers // TODO: implement a better way -void ggml_scratch_save(struct ggml_context * ctx) { +static void ggml_scratch_save(struct ggml_context * ctx) { // this is needed to allow opt tensors to store their data // TODO: again, need to find a better way ctx->no_alloc_save = ctx->no_alloc; @@ -4490,7 +4492,7 @@ void ggml_scratch_save(struct ggml_context * ctx) { ctx->scratch.data = NULL; } -void ggml_scratch_load(struct ggml_context * ctx) { +static void ggml_scratch_load(struct ggml_context * ctx) { ctx->no_alloc = ctx->no_alloc_save; ctx->scratch = ctx->scratch_save; @@ -4498,7 +4500,7 @@ void ggml_scratch_load(struct ggml_context * ctx) { //////////////////////////////////////////////////////////////////////////////// -struct ggml_tensor * ggml_new_tensor_impl( +static struct ggml_tensor * ggml_new_tensor_impl( struct ggml_context * ctx, enum ggml_type type, int n_dims, @@ -4621,6 +4623,21 @@ struct ggml_tensor * ggml_new_tensor_impl( return result; } +static void ggml_set_op_params(struct ggml_tensor * tensor, const void * params, size_t params_size) { + assert(params_size <= GGML_MAX_OP_PARAMS); + memcpy(tensor->op_params, params, params_size); +} + +static int32_t ggml_get_op_params_i32(const struct ggml_tensor * tensor, uint32_t i) { + assert(i < GGML_MAX_OP_PARAMS / sizeof(int32_t)); + return ((const int32_t *)(tensor->op_params))[i]; +} + +static void ggml_set_op_params_i32(struct ggml_tensor * tensor, uint32_t i, int32_t value) { + assert(i < GGML_MAX_OP_PARAMS / sizeof(int32_t)); + ((int32_t *)(tensor->op_params))[i] = value; +} + struct ggml_tensor * ggml_new_tensor( struct ggml_context * ctx, enum ggml_type type, @@ -4952,6 +4969,16 @@ float * ggml_get_data_f32(const struct ggml_tensor * tensor) { return (float *)(tensor->data); } +enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor) { + GGML_ASSERT(tensor->op == GGML_OP_UNARY); + return (enum ggml_unary_op) ggml_get_op_params_i32(tensor, 0); +} + +static void ggml_set_unary_op(struct ggml_tensor * tensor, enum ggml_unary_op op) { + GGML_ASSERT(tensor->op = GGML_OP_UNARY); + ggml_set_op_params_i32(tensor, 0, (int32_t) op); +} + const char * ggml_get_name(const struct ggml_tensor * tensor) { return tensor->name; } @@ -4970,11 +4997,6 @@ struct ggml_tensor * ggml_format_name(struct ggml_tensor * tensor, const char * return tensor; } -static void ggml_set_op_params(struct ggml_tensor * tensor, const void * params, size_t params_size) { - assert(params_size <= GGML_MAX_OP_PARAMS); - memcpy(tensor->op_params, params, params_size); -} - struct ggml_tensor * ggml_view_tensor( struct ggml_context * ctx, const struct ggml_tensor * src) { @@ -5010,7 +5032,7 @@ struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * nam // ggml_dup -struct ggml_tensor * ggml_dup_impl( +static struct ggml_tensor * ggml_dup_impl( struct ggml_context * ctx, struct ggml_tensor * a, bool inplace) { @@ -5043,7 +5065,7 @@ struct ggml_tensor * ggml_dup_inplace( // ggml_add -struct ggml_tensor * ggml_add_impl( +static struct ggml_tensor * ggml_add_impl( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, @@ -5086,7 +5108,7 @@ struct ggml_tensor * ggml_add_inplace( // ggml_add1 -struct ggml_tensor * ggml_add1_impl( +static struct ggml_tensor * ggml_add1_impl( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, @@ -5126,7 +5148,7 @@ struct ggml_tensor * ggml_add1_inplace( // ggml_acc -struct ggml_tensor * ggml_acc_impl( +static struct ggml_tensor * ggml_acc_impl( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, @@ -5183,7 +5205,7 @@ struct ggml_tensor * ggml_acc_inplace( // ggml_sub -struct ggml_tensor * ggml_sub_impl( +static struct ggml_tensor * ggml_sub_impl( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, @@ -5222,7 +5244,7 @@ struct ggml_tensor * ggml_sub_inplace( // ggml_mul -struct ggml_tensor * ggml_mul_impl( +static struct ggml_tensor * ggml_mul_impl( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, @@ -5269,7 +5291,7 @@ struct ggml_tensor * ggml_mul_inplace( // ggml_div -struct ggml_tensor * ggml_div_impl( +static struct ggml_tensor * ggml_div_impl( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, @@ -5312,7 +5334,7 @@ struct ggml_tensor * ggml_div_inplace( // ggml_sqr -struct ggml_tensor * ggml_sqr_impl( +static struct ggml_tensor * ggml_sqr_impl( struct ggml_context * ctx, struct ggml_tensor * a, bool inplace) { @@ -5345,7 +5367,7 @@ struct ggml_tensor * ggml_sqr_inplace( // ggml_sqrt -struct ggml_tensor * ggml_sqrt_impl( +static struct ggml_tensor * ggml_sqrt_impl( struct ggml_context * ctx, struct ggml_tensor * a, bool inplace) { @@ -5379,7 +5401,7 @@ struct ggml_tensor * ggml_sqrt_inplace( // ggml_log -struct ggml_tensor * ggml_log_impl( +static struct ggml_tensor * ggml_log_impl( struct ggml_context * ctx, struct ggml_tensor * a, bool inplace) { @@ -5559,333 +5581,142 @@ struct ggml_tensor * ggml_repeat_back( // ggml_abs -struct ggml_tensor * ggml_abs_impl( - struct ggml_context * ctx, - struct ggml_tensor * a, - bool inplace) { - bool is_node = false; - - if (!inplace && (a->grad)) { - is_node = true; - } - - struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); - - result->op = GGML_OP_ABS; - result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src[0] = a; - - return result; -} - struct ggml_tensor * ggml_abs( struct ggml_context * ctx, struct ggml_tensor * a) { - return ggml_abs_impl(ctx, a, false); + return ggml_unary(ctx, a, GGML_UNARY_OP_ABS); } struct ggml_tensor * ggml_abs_inplace( struct ggml_context * ctx, struct ggml_tensor * a) { - return ggml_abs_impl(ctx, a, true); + return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_ABS); } - // ggml_sgn -struct ggml_tensor * ggml_sgn_impl( - struct ggml_context * ctx, - struct ggml_tensor * a, - bool inplace) { - bool is_node = false; - - if (!inplace && (a->grad)) { - is_node = true; - } - - struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); - - result->op = GGML_OP_SGN; - result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src[0] = a; - - return result; -} - struct ggml_tensor * ggml_sgn( struct ggml_context * ctx, struct ggml_tensor * a) { - return ggml_sgn_impl(ctx, a, false); + return ggml_unary(ctx, a, GGML_UNARY_OP_SGN); } struct ggml_tensor * ggml_sgn_inplace( struct ggml_context * ctx, struct ggml_tensor * a) { - return ggml_sgn_impl(ctx, a, true); + return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SGN); } // ggml_neg -struct ggml_tensor * ggml_neg_impl( - struct ggml_context * ctx, - struct ggml_tensor * a, - bool inplace) { - bool is_node = false; - - if (!inplace && (a->grad)) { - is_node = true; - } - - struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); - - result->op = GGML_OP_NEG; - result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src[0] = a; - - return result; -} - struct ggml_tensor * ggml_neg( struct ggml_context * ctx, struct ggml_tensor * a) { - return ggml_neg_impl(ctx, a, false); + return ggml_unary(ctx, a, GGML_UNARY_OP_NEG); } struct ggml_tensor * ggml_neg_inplace( struct ggml_context * ctx, struct ggml_tensor * a) { - return ggml_neg_impl(ctx, a, true); + return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_NEG); } // ggml_step -struct ggml_tensor * ggml_step_impl( - struct ggml_context * ctx, - struct ggml_tensor * a, - bool inplace) { - bool is_node = false; - - if (!inplace && (a->grad)) { - is_node = true; - } - - struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); - - result->op = GGML_OP_STEP; - result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src[0] = a; - - return result; -} - struct ggml_tensor * ggml_step( struct ggml_context * ctx, struct ggml_tensor * a) { - return ggml_step_impl(ctx, a, false); + return ggml_unary(ctx, a, GGML_UNARY_OP_STEP); } struct ggml_tensor * ggml_step_inplace( struct ggml_context * ctx, struct ggml_tensor * a) { - return ggml_step_impl(ctx, a, true); + return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_STEP); } // ggml_tanh -struct ggml_tensor * ggml_tanh_impl( - struct ggml_context * ctx, - struct ggml_tensor * a, - bool inplace) { - bool is_node = false; - - if (!inplace && (a->grad)) { - is_node = true; - } - - struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); - - result->op = GGML_OP_TANH; - result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src[0] = a; - - return result; -} - struct ggml_tensor * ggml_tanh( struct ggml_context * ctx, struct ggml_tensor * a) { - return ggml_tanh_impl(ctx, a, false); + return ggml_unary(ctx, a, GGML_UNARY_OP_TANH); } struct ggml_tensor * ggml_tanh_inplace( struct ggml_context * ctx, struct ggml_tensor * a) { - return ggml_tanh_impl(ctx, a, true); + return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_TANH); } // ggml_elu -struct ggml_tensor * ggml_elu_impl( - struct ggml_context * ctx, - struct ggml_tensor * a, - bool inplace) { - bool is_node = false; - - if (!inplace && (a->grad)) { - is_node = true; - } - - struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); - - result->op = GGML_OP_ELU; - result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src[0] = a; - - return result; -} - struct ggml_tensor * ggml_elu( struct ggml_context * ctx, struct ggml_tensor * a) { - return ggml_elu_impl(ctx, a, false); + return ggml_unary(ctx, a, GGML_UNARY_OP_ELU); } struct ggml_tensor * ggml_elu_inplace( struct ggml_context * ctx, struct ggml_tensor * a) { - return ggml_elu_impl(ctx, a, true); + return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_ELU); } // ggml_relu -struct ggml_tensor * ggml_relu_impl( - struct ggml_context * ctx, - struct ggml_tensor * a, - bool inplace) { - bool is_node = false; - - if (!inplace && (a->grad)) { - is_node = true; - } - - struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); - - result->op = GGML_OP_RELU; - result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src[0] = a; - - return result; -} - struct ggml_tensor * ggml_relu( struct ggml_context * ctx, struct ggml_tensor * a) { - return ggml_relu_impl(ctx, a, false); + return ggml_unary(ctx, a, GGML_UNARY_OP_RELU); } struct ggml_tensor * ggml_relu_inplace( struct ggml_context * ctx, struct ggml_tensor * a) { - return ggml_relu_impl(ctx, a, true); + return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_RELU); } // ggml_gelu -struct ggml_tensor * ggml_gelu_impl( - struct ggml_context * ctx, - struct ggml_tensor * a, - bool inplace) { - bool is_node = false; - - if (!inplace && (a->grad)) { - is_node = true; - } - - struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); - - result->op = GGML_OP_GELU; - result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src[0] = a; - - return result; -} - struct ggml_tensor * ggml_gelu( struct ggml_context * ctx, struct ggml_tensor * a) { - return ggml_gelu_impl(ctx, a, false); + return ggml_unary(ctx, a, GGML_UNARY_OP_GELU); } struct ggml_tensor * ggml_gelu_inplace( struct ggml_context * ctx, struct ggml_tensor * a) { - return ggml_gelu_impl(ctx, a, true); + return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_GELU); } // ggml_gelu_quick -struct ggml_tensor * ggml_gelu_quick_impl( - struct ggml_context * ctx, - struct ggml_tensor * a, - bool inplace) { - bool is_node = false; - - if (!inplace && (a->grad)) { - is_node = true; - } - - struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); - - result->op = GGML_OP_GELU_QUICK; - result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src[0] = a; - - return result; -} - struct ggml_tensor * ggml_gelu_quick( struct ggml_context * ctx, struct ggml_tensor * a) { - return ggml_gelu_quick_impl(ctx, a, false); + return ggml_unary(ctx, a, GGML_UNARY_OP_GELU_QUICK); } struct ggml_tensor * ggml_gelu_quick_inplace( struct ggml_context * ctx, struct ggml_tensor * a) { - return ggml_gelu_quick_impl(ctx, a, true); + return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_GELU_QUICK); } // ggml_silu -struct ggml_tensor * ggml_silu_impl( - struct ggml_context * ctx, - struct ggml_tensor * a, - bool inplace) { - bool is_node = false; - - if (!inplace && (a->grad)) { - is_node = true; - } - - struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); - - result->op = GGML_OP_SILU; - result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src[0] = a; - - return result; -} - struct ggml_tensor * ggml_silu( struct ggml_context * ctx, struct ggml_tensor * a) { - return ggml_silu_impl(ctx, a, false); + return ggml_unary(ctx, a, GGML_UNARY_OP_SILU); } struct ggml_tensor * ggml_silu_inplace( struct ggml_context * ctx, struct ggml_tensor * a) { - return ggml_silu_impl(ctx, a, true); + return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SILU); } // ggml_silu_back @@ -5913,7 +5744,7 @@ struct ggml_tensor * ggml_silu_back( // ggml_norm -struct ggml_tensor * ggml_norm_impl( +static struct ggml_tensor * ggml_norm_impl( struct ggml_context * ctx, struct ggml_tensor * a, bool inplace) { @@ -5947,7 +5778,7 @@ struct ggml_tensor * ggml_norm_inplace( return ggml_norm_impl(ctx, a, true); } -struct ggml_tensor * ggml_rms_norm_impl( +static struct ggml_tensor * ggml_rms_norm_impl( struct ggml_context * ctx, struct ggml_tensor * a, bool inplace) { @@ -6056,7 +5887,7 @@ struct ggml_tensor * ggml_out_prod( // ggml_scale -struct ggml_tensor * ggml_scale_impl( +static struct ggml_tensor * ggml_scale_impl( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, @@ -6096,7 +5927,7 @@ struct ggml_tensor * ggml_scale_inplace( // ggml_set -struct ggml_tensor * ggml_set_impl( +static struct ggml_tensor * ggml_set_impl( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, @@ -6186,7 +6017,7 @@ struct ggml_tensor * ggml_set_2d_inplace( // ggml_cpy -struct ggml_tensor * ggml_cpy_impl( +static struct ggml_tensor * ggml_cpy_impl( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, @@ -6231,7 +6062,7 @@ struct ggml_tensor * ggml_cpy_inplace( // ggml_cont -struct ggml_tensor * ggml_cont_impl( +static struct ggml_tensor * ggml_cont_impl( struct ggml_context * ctx, struct ggml_tensor * a, bool inplace) { @@ -6701,7 +6532,7 @@ struct ggml_tensor * ggml_diag( // ggml_diag_mask_inf -struct ggml_tensor * ggml_diag_mask_inf_impl( +static struct ggml_tensor * ggml_diag_mask_inf_impl( struct ggml_context * ctx, struct ggml_tensor * a, int n_past, @@ -6741,7 +6572,7 @@ struct ggml_tensor * ggml_diag_mask_inf_inplace( // ggml_diag_mask_zero -struct ggml_tensor * ggml_diag_mask_zero_impl( +static struct ggml_tensor * ggml_diag_mask_zero_impl( struct ggml_context * ctx, struct ggml_tensor * a, int n_past, @@ -6780,7 +6611,7 @@ struct ggml_tensor * ggml_diag_mask_zero_inplace( // ggml_soft_max -struct ggml_tensor * ggml_soft_max_impl( +static struct ggml_tensor * ggml_soft_max_impl( struct ggml_context * ctx, struct ggml_tensor * a, bool inplace) { @@ -6814,7 +6645,7 @@ struct ggml_tensor * ggml_soft_max_inplace( // ggml_soft_max_back -struct ggml_tensor * ggml_soft_max_back_impl( +static struct ggml_tensor * ggml_soft_max_back_impl( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, @@ -6851,7 +6682,7 @@ struct ggml_tensor * ggml_soft_max_back_inplace( // ggml_rope -struct ggml_tensor * ggml_rope_impl( +static struct ggml_tensor * ggml_rope_impl( struct ggml_context * ctx, struct ggml_tensor * a, int n_past, @@ -7363,9 +7194,47 @@ struct ggml_tensor * ggml_win_unpart( return result; } +// gmml_unary + +static struct ggml_tensor * ggml_unary_impl( + struct ggml_context * ctx, + struct ggml_tensor * a, + enum ggml_unary_op op, + bool inplace) { + bool is_node = false; + + if (!inplace && (a->grad)) { + is_node = true; + } + + struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); + + ggml_set_unary_op(result, op); + + result->op = GGML_OP_UNARY; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src[0] = a; + + return result; +} + +struct ggml_tensor * ggml_unary( + struct ggml_context * ctx, + struct ggml_tensor * a, + enum ggml_unary_op op) { + return ggml_unary_impl(ctx, a, op, false); +} + +struct ggml_tensor * ggml_unary_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a, + enum ggml_unary_op op) { + return ggml_unary_impl(ctx, a, op, true); +} + // ggml_map_unary -struct ggml_tensor * ggml_map_unary_impl_f32( +static struct ggml_tensor * ggml_map_unary_impl_f32( struct ggml_context * ctx, struct ggml_tensor * a, const ggml_unary_op_f32_t fun, @@ -7403,7 +7272,7 @@ struct ggml_tensor * ggml_map_unary_inplace_f32( // ggml_map_binary -struct ggml_tensor * ggml_map_binary_impl_f32( +static struct ggml_tensor * ggml_map_binary_impl_f32( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, @@ -7447,7 +7316,7 @@ struct ggml_tensor * ggml_map_binary_inplace_f32( // ggml_map_custom1 -struct ggml_tensor * ggml_map_custom1_impl_f32( +static struct ggml_tensor * ggml_map_custom1_impl_f32( struct ggml_context * ctx, struct ggml_tensor * a, const ggml_custom1_op_f32_t fun, @@ -7485,7 +7354,7 @@ struct ggml_tensor * ggml_map_custom1_inplace_f32( // ggml_map_custom2 -struct ggml_tensor * ggml_map_custom2_impl_f32( +static struct ggml_tensor * ggml_map_custom2_impl_f32( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, @@ -7527,7 +7396,7 @@ struct ggml_tensor * ggml_map_custom2_inplace_f32( // ggml_map_custom3 -struct ggml_tensor * ggml_map_custom3_impl_f32( +static struct ggml_tensor * ggml_map_custom3_impl_f32( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, @@ -9292,7 +9161,7 @@ static void ggml_compute_forward_sum_f32( for (int64_t i03 = 0; i03 < ne03; i03++) { for (int64_t i02 = 0; i02 < ne02; i02++) { for (int64_t i01 = 0; i01 < ne01; i01++) { - ggml_vec_sum_ggf(ne00, + ggml_vec_sum_f32_ggf(ne00, &row_sum, (float *) ((char *) src0->data + i01*nb01 + i02*nb02 + i03*nb03)); sum += row_sum; @@ -9302,6 +9171,38 @@ static void ggml_compute_forward_sum_f32( ((float *) dst->data)[0] = sum; } +static void ggml_compute_forward_sum_f16( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + struct ggml_tensor * dst) { + assert(params->ith == 0); + assert(ggml_is_scalar(dst)); + + if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { + return; + } + + assert(src0->nb[0] == sizeof(ggml_fp16_t)); + + GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne); + GGML_TENSOR_LOCALS(size_t, nb0, src0, nb); + + float sum = 0; + float row_sum = 0; + + for (int64_t i03 = 0; i03 < ne03; i03++) { + for (int64_t i02 = 0; i02 < ne02; i02++) { + for (int64_t i01 = 0; i01 < ne01; i01++) { + ggml_vec_sum_f16_ggf(ne00, + &row_sum, + (ggml_fp16_t *) ((char *) src0->data + i01 * nb01 + i02 * nb02 + i03 * nb03)); + sum += row_sum; + } + } + } + ((ggml_fp16_t *) dst->data)[0] = GGML_FP32_TO_FP16(sum); +} + static void ggml_compute_forward_sum( const struct ggml_compute_params * params, const struct ggml_tensor * src0, @@ -9311,6 +9212,10 @@ static void ggml_compute_forward_sum( { ggml_compute_forward_sum_f32(params, src0, dst); } break; + case GGML_TYPE_F16: + { + ggml_compute_forward_sum_f16(params, src0, dst); + } break; default: { GGML_ASSERT(false); @@ -10077,7 +9982,6 @@ static void ggml_compute_forward_silu( } } - // ggml_compute_forward_silu_back static void ggml_compute_forward_silu_back_f32( @@ -14122,6 +14026,62 @@ static void ggml_compute_forward_win_unpart( } } +//gmml_compute_forward_unary + +static void ggml_compute_forward_unary( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + struct ggml_tensor * dst) { + const enum ggml_unary_op op = ggml_get_unary_op(dst); + + switch (op) { + case GGML_UNARY_OP_ABS: + { + ggml_compute_forward_abs(params, src0, dst); + } break; + case GGML_UNARY_OP_SGN: + { + ggml_compute_forward_sgn(params, src0, dst); + } break; + case GGML_UNARY_OP_NEG: + { + ggml_compute_forward_neg(params, src0, dst); + } break; + case GGML_UNARY_OP_STEP: + { + ggml_compute_forward_step(params, src0, dst); + } break; + case GGML_UNARY_OP_TANH: + { + ggml_compute_forward_tanh(params, src0, dst); + } break; + case GGML_UNARY_OP_ELU: + { + ggml_compute_forward_elu(params, src0, dst); + } break; + case GGML_UNARY_OP_RELU: + { + ggml_compute_forward_relu(params, src0, dst); + } break; + case GGML_UNARY_OP_GELU: + { + ggml_compute_forward_gelu(params, src0, dst); + } break; + case GGML_UNARY_OP_GELU_QUICK: + { + ggml_compute_forward_gelu_quick(params, src0, dst); + } break; + case GGML_UNARY_OP_SILU: + { + ggml_compute_forward_silu(params, src0, dst); + } break; + default: + { + GGML_ASSERT(false); + } break; + } +} + // ggml_compute_forward_map_unary static void ggml_compute_forward_map_unary_f32( @@ -14682,46 +14642,6 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm { ggml_compute_forward_repeat_back(params, tensor->src[0], tensor); } break; - case GGML_OP_ABS: - { - ggml_compute_forward_abs(params, tensor->src[0], tensor); - } break; - case GGML_OP_SGN: - { - ggml_compute_forward_sgn(params, tensor->src[0], tensor); - } break; - case GGML_OP_NEG: - { - ggml_compute_forward_neg(params, tensor->src[0], tensor); - } break; - case GGML_OP_STEP: - { - ggml_compute_forward_step(params, tensor->src[0], tensor); - } break; - case GGML_OP_TANH: - { - ggml_compute_forward_tanh(params, tensor->src[0], tensor); - } break; - case GGML_OP_ELU: - { - ggml_compute_forward_elu(params, tensor->src[0], tensor); - } break; - case GGML_OP_RELU: - { - ggml_compute_forward_relu(params, tensor->src[0], tensor); - } break; - case GGML_OP_GELU: - { - ggml_compute_forward_gelu(params, tensor->src[0], tensor); - } break; - case GGML_OP_GELU_QUICK: - { - ggml_compute_forward_gelu_quick(params, tensor->src[0], tensor); - } break; - case GGML_OP_SILU: - { - ggml_compute_forward_silu(params, tensor->src[0], tensor); - } break; case GGML_OP_SILU_BACK: { ggml_compute_forward_silu_back(params, tensor->src[0], tensor->src[1], tensor); @@ -14864,6 +14784,10 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm { ggml_compute_forward_win_unpart(params, tensor->src[0], tensor); } break; + case GGML_OP_UNARY: + { + ggml_compute_forward_unary(params, tensor->src[0], tensor); + } break; case GGML_OP_MAP_UNARY: { ggml_unary_op_f32_t fun; @@ -15112,73 +15036,6 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor inplace); } } break; - case GGML_OP_ABS: - { - if (src0->grad) { - src0->grad = - ggml_add_impl(ctx, - src0->grad, - ggml_mul(ctx, - ggml_sgn(ctx, src0), - tensor->grad), - inplace); - } - } break; - case GGML_OP_SGN: - { - if (src0->grad) { - // noop - } - } break; - case GGML_OP_NEG: - { - if (src0->grad) { - src0->grad = ggml_sub_impl(ctx, src0->grad, tensor->grad, inplace); - } - } break; - case GGML_OP_STEP: - { - if (src0->grad) { - // noop - } - } break; - case GGML_OP_TANH: - { - GGML_ASSERT(false); // TODO: not implemented - } break; - case GGML_OP_ELU: - { - GGML_ASSERT(false); // TODO: not implemented - } break; - case GGML_OP_RELU: - { - if (src0->grad) { - src0->grad = ggml_sub_impl(ctx, - src0->grad, - ggml_mul(ctx, - ggml_step(ctx, src0), - tensor->grad), - inplace); - } - } break; - case GGML_OP_GELU: - { - GGML_ASSERT(false); // TODO: not implemented - } break; - case GGML_OP_GELU_QUICK: - { - GGML_ASSERT(false); // TODO: not implemented - } break; - case GGML_OP_SILU: - { - // necessary for llama - if (src0->grad) { - src0->grad = ggml_add_impl(ctx, - src0->grad, - ggml_silu_back(ctx, src0, tensor->grad), - inplace); - } - } break; case GGML_OP_SILU_BACK: { GGML_ASSERT(false); // TODO: not implemented @@ -15440,9 +15297,6 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor ggml_diag_mask_zero_impl(ctx, tensor->grad, n_past, false), inplace); } - if (src1->grad) { - // noop - } } break; case GGML_OP_DIAG_MASK_ZERO: { @@ -15454,9 +15308,6 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor ggml_diag_mask_zero_impl(ctx, tensor->grad, n_past, false), inplace); } - if (src1->grad) { - // noop - } } break; case GGML_OP_SOFT_MAX: { @@ -15491,9 +15342,6 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor n_ctx), inplace); } - if (src1->grad) { - // noop - } } break; case GGML_OP_ROPE_BACK: { @@ -15512,9 +15360,6 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor n_ctx), inplace); } - if (src1->grad) { - // noop - } } break; case GGML_OP_ALIBI: { @@ -15707,6 +15552,80 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor } break; case GGML_OP_WIN_PART: case GGML_OP_WIN_UNPART: + case GGML_OP_UNARY: + { + switch (ggml_get_unary_op(tensor)) { + case GGML_UNARY_OP_ABS: + { + if (src0->grad) { + src0->grad = + ggml_add_impl(ctx, + src0->grad, + ggml_mul(ctx, + ggml_sgn(ctx, src0), + tensor->grad), + inplace); + } + } break; + case GGML_UNARY_OP_SGN: + { + if (src0->grad) { + // noop + } + } break; + case GGML_UNARY_OP_NEG: + { + if (src0->grad) { + src0->grad = ggml_sub_impl(ctx, src0->grad, tensor->grad, inplace); + } + } break; + case GGML_UNARY_OP_STEP: + { + if (src0->grad) { + // noop + } + } break; + case GGML_UNARY_OP_TANH: + { + GGML_ASSERT(false); // TODO: not implemented + } break; + case GGML_UNARY_OP_ELU: + { + GGML_ASSERT(false); // TODO: not implemented + } break; + case GGML_UNARY_OP_RELU: + { + if (src0->grad) { + src0->grad = ggml_add_impl(ctx, + src0->grad, + ggml_mul(ctx, + ggml_step(ctx, src0), + tensor->grad), + inplace); + } + } break; + case GGML_UNARY_OP_GELU: + { + GGML_ASSERT(false); // TODO: not implemented + } break; + case GGML_UNARY_OP_GELU_QUICK: + { + GGML_ASSERT(false); // TODO: not implemented + } break; + case GGML_UNARY_OP_SILU: + { + // necessary for llama + if (src0->grad) { + src0->grad = ggml_add_impl(ctx, + src0->grad, + ggml_silu_back(ctx, src0, tensor->grad), + inplace); + } + } break; + default: + GGML_ASSERT(false); + } + } break; case GGML_OP_MAP_UNARY: case GGML_OP_MAP_BINARY: case GGML_OP_MAP_CUSTOM1: @@ -15937,7 +15856,7 @@ typedef pthread_t ggml_thread_t; // Android's libc implementation "bionic" does not support setting affinity #if defined(__linux__) && !defined(__BIONIC__) -void set_numa_thread_affinity(int thread_n, int n_threads) { +static void set_numa_thread_affinity(int thread_n, int n_threads) { if (!ggml_is_numa()) { return; } @@ -15962,7 +15881,7 @@ void set_numa_thread_affinity(int thread_n, int n_threads) { CPU_FREE(cpus); } -void clear_numa_thread_affinity(void) { +static void clear_numa_thread_affinity(void) { if (!ggml_is_numa()) { return; } @@ -15986,8 +15905,8 @@ void clear_numa_thread_affinity(void) { #else // TODO: Windows etc. // (the linux implementation may also work on BSD, someone should test) -void set_numa_thread_affinity(int thread_n, int n_threads) { UNUSED(thread_n); UNUSED(n_threads); } -void clear_numa_thread_affinity(void) {} +static void set_numa_thread_affinity(int thread_n, int n_threads) { UNUSED(thread_n); UNUSED(n_threads); } +static void clear_numa_thread_affinity(void) {} #endif struct ggml_compute_state_shared { @@ -16199,21 +16118,34 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) { case GGML_OP_ARGMAX: case GGML_OP_REPEAT: case GGML_OP_REPEAT_BACK: - case GGML_OP_ABS: - case GGML_OP_SGN: - case GGML_OP_NEG: - case GGML_OP_STEP: - case GGML_OP_TANH: - case GGML_OP_ELU: - case GGML_OP_RELU: - { + { n_tasks = 1; } break; - case GGML_OP_MUL: - case GGML_OP_GELU: - case GGML_OP_GELU_QUICK: - case GGML_OP_SILU: + + case GGML_OP_UNARY: + { + switch (ggml_get_unary_op(node)) { + case GGML_UNARY_OP_ABS: + case GGML_UNARY_OP_SGN: + case GGML_UNARY_OP_NEG: + case GGML_UNARY_OP_STEP: + case GGML_UNARY_OP_TANH: + case GGML_UNARY_OP_ELU: + case GGML_UNARY_OP_RELU: + { + n_tasks = 1; + } break; + + case GGML_UNARY_OP_GELU: + case GGML_UNARY_OP_GELU_QUICK: + case GGML_UNARY_OP_SILU: + { + n_tasks = n_threads; + } break; + } + } break; case GGML_OP_SILU_BACK: + case GGML_OP_MUL: case GGML_OP_NORM: case GGML_OP_RMS_NORM: case GGML_OP_RMS_NORM_BACK: @@ -16728,7 +16660,8 @@ void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) { fwrite(&nb, sizeof(uint64_t), 1, fout); } - fwrite(tensor->name, sizeof(char), GGML_MAX_NAME, fout); + fwrite(tensor->name, sizeof(char), GGML_MAX_NAME, fout); + fwrite(tensor->op_params, sizeof(char), GGML_MAX_OP_PARAMS, fout); // dump the data // TODO: pad this to 32 byte boundary @@ -16761,7 +16694,8 @@ void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) { fwrite(&nb, sizeof(uint64_t), 1, fout); } - fwrite(tensor->name, sizeof(char), GGML_MAX_NAME, fout); + fwrite(tensor->name, sizeof(char), GGML_MAX_NAME, fout); + fwrite(tensor->op_params, sizeof(char), GGML_MAX_OP_PARAMS, fout); // output the op arguments { @@ -16942,7 +16876,8 @@ struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context ** tensor->op = (enum ggml_op) op; - memcpy(tensor->name, ptr, GGML_MAX_NAME); ptr += GGML_MAX_NAME; + memcpy(tensor->name, ptr, GGML_MAX_NAME); ptr += GGML_MAX_NAME; + memcpy(tensor->op_params, ptr, GGML_MAX_OP_PARAMS); ptr += GGML_MAX_OP_PARAMS; tensor->data = (void *) ptr; @@ -16987,7 +16922,8 @@ struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context ** nb[j] = nb_cur; } - const char * ptr_name = ptr; ptr += GGML_MAX_NAME; + const char * ptr_name = ptr; ptr += GGML_MAX_NAME; + const char * ptr_op_params = ptr; ptr += GGML_MAX_OP_PARAMS; const int32_t * ptr_arg_idx = (const int32_t *) ptr; ptr += GGML_MAX_SRC*sizeof(int32_t); @@ -17024,8 +16960,8 @@ struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context ** { tensor = ggml_view_4d(*ctx_eval, args[0], ne[0], ne[1], ne[2], ne[3], 0, 0, 0, 0); - uint64_t offs; - memcpy(&offs, tensor->op_params, sizeof(offs)); + size_t offs; + memcpy(&offs, ptr_op_params, sizeof(offs)); tensor->data = ((char *) tensor->data) + offs; } break; @@ -17045,7 +16981,8 @@ struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context ** } break; } - memcpy(tensor->name, ptr_name, GGML_MAX_NAME); + memcpy(tensor->name, ptr_name, GGML_MAX_NAME); + memcpy(tensor->op_params, ptr_op_params, GGML_MAX_OP_PARAMS); for (int j = 0; j < GGML_MAX_DIMS; ++j) { tensor->nb[j] = nb[j]; @@ -17079,7 +17016,7 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) { GGML_PRINT(" - %3d: [ %5" PRId64 ", %5" PRId64 ", %5" PRId64 "] %16s %s (%3d) cpu = %7.3f / %7.3f ms, wall = %7.3f / %7.3f ms\n", i, node->ne[0], node->ne[1], node->ne[2], - GGML_OP_NAME[node->op], node->is_param ? "x" : node->grad ? "g" : " ", node->perf_runs, + ggml_op_name(node->op), node->is_param ? "x" : node->grad ? "g" : " ", node->perf_runs, (double) node->perf_cycles / (double) ggml_cycles_per_ms(), (double) node->perf_cycles / (double) ggml_cycles_per_ms() / (double) node->perf_runs, (double) node->perf_time_us / 1000.0, @@ -17093,7 +17030,7 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) { GGML_PRINT(" - %3d: [ %5" PRId64 ", %5" PRId64 "] %8s\n", i, node->ne[0], node->ne[1], - GGML_OP_NAME[node->op]); + ggml_op_name(node->op)); } for (int i = 0; i < GGML_OP_COUNT; i++) { @@ -17101,7 +17038,7 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) { continue; } - GGML_PRINT("perf_total_per_op_us[%16s] = %7.3f ms\n", GGML_OP_NAME[i], (double) perf_total_per_op_us[i] / 1000.0); + GGML_PRINT("perf_total_per_op_us[%16s] = %7.3f ms\n", ggml_op_name(i), (double) perf_total_per_op_us[i] / 1000.0); } GGML_PRINT("========================================\n"); @@ -17195,13 +17132,13 @@ void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph } if (node->n_dims == 2) { - fprintf(fp, "%d [%" PRId64 ", %" PRId64 "] | %s", i, node->ne[0], node->ne[1], GGML_OP_SYMBOL[node->op]); + fprintf(fp, "%d [%" PRId64 ", %" PRId64 "] | %s", i, node->ne[0], node->ne[1], ggml_op_symbol(node->op)); } else { - fprintf(fp, "%d [%" PRId64 ", %" PRId64 ", %" PRId64 "] | %s", i, node->ne[0], node->ne[1], node->ne[2], GGML_OP_SYMBOL[node->op]); + fprintf(fp, "%d [%" PRId64 ", %" PRId64 ", %" PRId64 "] | %s", i, node->ne[0], node->ne[1], node->ne[2], ggml_op_symbol(node->op)); } if (node->grad) { - fprintf(fp, " | %s\"; ]\n", GGML_OP_SYMBOL[node->grad->op]); + fprintf(fp, " | %s\"; ]\n", ggml_op_symbol(node->grad->op)); } else { fprintf(fp, "\"; ]\n"); } diff --git a/ggml.h b/ggml.h index 871c85a89aae7..de44fba9e0961 100644 --- a/ggml.h +++ b/ggml.h @@ -330,16 +330,6 @@ extern "C" { GGML_OP_ARGMAX, GGML_OP_REPEAT, GGML_OP_REPEAT_BACK, - GGML_OP_ABS, - GGML_OP_SGN, - GGML_OP_NEG, - GGML_OP_STEP, - GGML_OP_TANH, - GGML_OP_ELU, - GGML_OP_RELU, - GGML_OP_GELU, - GGML_OP_GELU_QUICK, - GGML_OP_SILU, GGML_OP_SILU_BACK, GGML_OP_NORM, // normalize GGML_OP_RMS_NORM, @@ -378,6 +368,8 @@ extern "C" { GGML_OP_WIN_PART, GGML_OP_WIN_UNPART, + GGML_OP_UNARY, + GGML_OP_MAP_UNARY, GGML_OP_MAP_BINARY, @@ -391,6 +383,18 @@ extern "C" { GGML_OP_COUNT, }; + enum ggml_unary_op { + GGML_UNARY_OP_ABS, + GGML_UNARY_OP_SGN, + GGML_UNARY_OP_NEG, + GGML_UNARY_OP_STEP, + GGML_UNARY_OP_TANH, + GGML_UNARY_OP_ELU, + GGML_UNARY_OP_RELU, + GGML_UNARY_OP_GELU, + GGML_UNARY_OP_GELU_QUICK, + GGML_UNARY_OP_SILU, + }; // ggml object struct ggml_object { @@ -535,6 +539,7 @@ extern "C" { GGML_API const char * ggml_type_name(enum ggml_type type); GGML_API const char * ggml_op_name (enum ggml_op op); + GGML_API const char * ggml_op_symbol(enum ggml_op op); GGML_API size_t ggml_element_size(const struct ggml_tensor * tensor); @@ -558,6 +563,7 @@ extern "C" { GGML_API size_t ggml_used_mem(const struct ggml_context * ctx); GGML_API size_t ggml_set_scratch (struct ggml_context * ctx, struct ggml_scratch scratch); + GGML_API bool ggml_get_no_alloc(struct ggml_context * ctx); GGML_API void ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc); GGML_API void * ggml_get_mem_buffer (const struct ggml_context * ctx); @@ -617,9 +623,11 @@ extern "C" { GGML_API void * ggml_get_data (const struct ggml_tensor * tensor); GGML_API float * ggml_get_data_f32(const struct ggml_tensor * tensor); - GGML_API const char * ggml_get_name(const struct ggml_tensor * tensor); - GGML_API struct ggml_tensor * ggml_set_name(struct ggml_tensor * tensor, const char * name); - GGML_API struct ggml_tensor * ggml_format_name(struct ggml_tensor * tensor, const char * fmt, ...); + GGML_API enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor); + + GGML_API const char * ggml_get_name (const struct ggml_tensor * tensor); + GGML_API struct ggml_tensor * ggml_set_name ( struct ggml_tensor * tensor, const char * name); + GGML_API struct ggml_tensor * ggml_format_name( struct ggml_tensor * tensor, const char * fmt, ...); // // operations on tensors with backpropagation @@ -629,6 +637,11 @@ extern "C" { struct ggml_context * ctx, struct ggml_tensor * a); + // in-place, returns view(a) + GGML_API struct ggml_tensor * ggml_dup_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a); + GGML_API struct ggml_tensor * ggml_add( struct ggml_context * ctx, struct ggml_tensor * a, @@ -952,11 +965,22 @@ extern "C" { struct ggml_tensor * a, struct ggml_tensor * b); + // a -> b, in-place, return view(b) + GGML_API struct ggml_tensor * ggml_cpy_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b); + // make contiguous GGML_API struct ggml_tensor * ggml_cont( struct ggml_context * ctx, struct ggml_tensor * a); + // make contiguous, in-place + GGML_API struct ggml_tensor * ggml_cont_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a); + // return view(a), b specifies the new shape // TODO: when we start computing gradient, make a copy instead of view GGML_API struct ggml_tensor * ggml_reshape( @@ -1268,6 +1292,16 @@ extern "C" { typedef void (*ggml_custom2_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *); typedef void (*ggml_custom3_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *); + GGML_API struct ggml_tensor * ggml_unary( + struct ggml_context * ctx, + struct ggml_tensor * a, + enum ggml_unary_op op); + + GGML_API struct ggml_tensor * ggml_unary_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a, + enum ggml_unary_op op); + GGML_API struct ggml_tensor * ggml_map_unary_f32( struct ggml_context * ctx, struct ggml_tensor * a, diff --git a/tests/test-grad0.c b/tests/test-grad0.c index 01467bc184372..ef20bce516662 100644 --- a/tests/test-grad0.c +++ b/tests/test-grad0.c @@ -64,7 +64,7 @@ void get_random_dims(int64_t * dims, int ndims) { } } -struct ggml_tensor * get_random_tensor( +struct ggml_tensor * get_random_tensor_f32( struct ggml_context * ctx0, int ndims, int64_t ne[], @@ -112,7 +112,55 @@ struct ggml_tensor * get_random_tensor( return result; } -struct ggml_tensor * get_random_tensor_int( +struct ggml_tensor * get_random_tensor_f16( + struct ggml_context * ctx0, + int ndims, + int64_t ne[], + float fmin, + float fmax) { + struct ggml_tensor * result = ggml_new_tensor(ctx0, GGML_TYPE_F16, ndims, ne); + + switch (ndims) { + case 1: + for (int i0 = 0; i0 < ne[0]; i0++) { + ((ggml_fp16_t *)result->data)[i0] = ggml_fp32_to_fp16(frand()*(fmax - fmin) + fmin); + } + break; + case 2: + for (int i1 = 0; i1 < ne[1]; i1++) { + for (int i0 = 0; i0 < ne[0]; i0++) { + ((ggml_fp16_t *)result->data)[i1*ne[0] + i0] = ggml_fp32_to_fp16(frand()*(fmax - fmin) + fmin); + } + } + break; + case 3: + for (int i2 = 0; i2 < ne[2]; i2++) { + for (int i1 = 0; i1 < ne[1]; i1++) { + for (int i0 = 0; i0 < ne[0]; i0++) { + ((ggml_fp16_t *)result->data)[i2*ne[1]*ne[0] + i1*ne[0] + i0] = ggml_fp32_to_fp16(frand()*(fmax - fmin) + fmin); + } + } + } + break; + case 4: + for (int i3 = 0; i3 < ne[3]; i3++) { + for (int i2 = 0; i2 < ne[2]; i2++) { + for (int i1 = 0; i1 < ne[1]; i1++) { + for (int i0 = 0; i0 < ne[0]; i0++) { + ((ggml_fp16_t *)result->data)[i3*ne[2]*ne[1]*ne[0] + i2*ne[1]*ne[0] + i1*ne[0] + i0] = ggml_fp32_to_fp16(frand()*(fmax - fmin) + fmin); + } + } + } + } + break; + default: + assert(false); + }; + + return result; +} + +struct ggml_tensor * get_random_tensor_i32( struct ggml_context * ctx0, int ndims, int64_t ne[], @@ -160,23 +208,6 @@ struct ggml_tensor * get_random_tensor_int( return result; } -float get_element(const struct ggml_tensor * t, int idx) { - if (t->type == GGML_TYPE_F32) { - return ((float *)t->data)[idx]; - } - - if (t->type == GGML_TYPE_I32) { - return ((int32_t *)t->data)[idx]; - } - - assert(false); - return INFINITY; -} - -void set_element(struct ggml_tensor * t, int idx, float value) { - ((float *)t->data)[idx] = value; -} - void print_elements(const char* label, const struct ggml_tensor * t) { if (!t) { printf("%s: %s = null\n", __func__, label); @@ -186,7 +217,7 @@ void print_elements(const char* label, const struct ggml_tensor * t) { printf("%s: %s = [", __func__, label); for (int k = 0; k < nelements; ++k) { if (k > 0) { printf(", "); } - printf("%.5f", get_element(t, k)); + printf("%.5f", ggml_get_f32_1d(t, k)); } printf("] shape: ["); for (int k = 0; k < t->n_dims; ++k) { @@ -237,23 +268,23 @@ bool check_gradient( const int nelements = ggml_nelements(x[i]); for (int k = 0; k < nelements; ++k) { // compute gradient using finite differences - const float x0 = get_element(x[i], k); + const float x0 = ggml_get_f32_1d(x[i], k); const float xm = x0 - eps; const float xp = x0 + eps; - set_element(x[i], k, xp); + ggml_set_f32_1d(x[i], k, xp); ggml_graph_compute_with_ctx(ctx0, &gf, n_threads); const float f0 = ggml_get_f32_1d(f, 0); - set_element(x[i], k, xm); + ggml_set_f32_1d(x[i], k, xm); ggml_graph_compute_with_ctx(ctx0, &gf, n_threads); const float f1 = ggml_get_f32_1d(f, 0); const float g0 = (f0 - f1)/(2.0f*eps); - set_element(x[i], k, x0); + ggml_set_f32_1d(x[i], k, x0); // compute gradient using backward graph ggml_graph_reset (&gf); @@ -261,7 +292,7 @@ bool check_gradient( ggml_graph_compute_with_ctx(ctx0, &gb, n_threads); - const float g1 = get_element(x[i]->grad, k); + const float g1 = ggml_get_f32_1d(x[i]->grad, k); const float error_abs = fabsf(g0 - g1); const float error_rel = g0 != 0 ? fabsf(g0 - g1)/fabsf(g0) : 0; @@ -392,19 +423,35 @@ int main(int argc, const char ** argv) { struct ggml_tensor * x[MAX_NARGS]; - // add + // add f32 { const int nargs = 2; for (int ndims = 1; ndims <= 4; ++ndims) { for (int i = 0; i < nargs; ++i) { - x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[i]); } struct ggml_tensor * f = ggml_sum(ctx0, ggml_add(ctx0, x[0], x[1])); - check_gradient("add", ctx0, x, f, ndims, nargs, 1e-3f, 2e-3f, 2e-3f); + check_gradient("add f32", ctx0, x, f, ndims, nargs, 1e-3f, 2e-3f, 2e-3f); + } + } + + // add f16 + { + const int nargs = 2; + + for (int ndims = 1; ndims <= 4; ++ndims) { + for (int i = 0; i < nargs; ++i) { + x[i] = get_random_tensor_f16(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[i]); + } + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_add(ctx0, x[0], x[1])); + + check_gradient("add f16", ctx0, x, f, ndims, nargs, 1e-1f, 2e-1f, 2e-1f); } } @@ -414,7 +461,7 @@ int main(int argc, const char ** argv) { for (int ndims = 1; ndims <= 4; ++ndims) { for (int i = 0; i < nargs; ++i) { - x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[i]); } @@ -430,7 +477,7 @@ int main(int argc, const char ** argv) { for (int ndims = 1; ndims <= 4; ++ndims) { for (int i = 0; i < nargs; ++i) { - x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[i]); } @@ -446,7 +493,7 @@ int main(int argc, const char ** argv) { for (int ndims = 1; ndims <= 4; ++ndims) { for (int i = 0; i < nargs; ++i) { - x[i] = get_random_tensor(ctx0, ndims, ne, 0.5f, 1.0f); + x[i] = get_random_tensor_f32(ctx0, ndims, ne, 0.5f, 1.0f); ggml_set_param(ctx0, x[i]); } @@ -462,7 +509,7 @@ int main(int argc, const char ** argv) { for (int ndims = 1; ndims <= 2; ++ndims) { for (int i = 0; i < nargs; ++i) { - x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[i]); } @@ -478,7 +525,7 @@ int main(int argc, const char ** argv) { for (int ndims = 1; ndims <= 2; ++ndims) { for (int i = 0; i < nargs; ++i) { - x[i] = get_random_tensor(ctx0, ndims, ne, 2.0f*1e-3f, 1.0f); + x[i] = get_random_tensor_f32(ctx0, ndims, ne, 2.0f*1e-3f, 1.0f); ggml_set_param(ctx0, x[i]); } @@ -494,7 +541,7 @@ int main(int argc, const char ** argv) { for (int ndims = 1; ndims <= 2; ++ndims) { for (int i = 0; i < nargs; ++i) { - x[i] = get_random_tensor(ctx0, ndims, ne, 2.0f*1e-3f, 1.0f); + x[i] = get_random_tensor_f32(ctx0, ndims, ne, 2.0f*1e-3f, 1.0f); ggml_set_param(ctx0, x[i]); } @@ -510,7 +557,7 @@ int main(int argc, const char ** argv) { for (int ndims = 1; ndims <= 2; ++ndims) { for (int i = 0; i < nargs; ++i) { - x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[i]); } @@ -527,7 +574,7 @@ int main(int argc, const char ** argv) { for (int ndims = 1; ndims <= 4; ++ndims) { for (int i = 0; i < nargs; ++i) { - x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[i]); } @@ -537,6 +584,40 @@ int main(int argc, const char ** argv) { } } + // mean, not yet fully implemented + if(0) + { + const int nargs = 1; + + for (int ndims = 1; ndims <= 4; ++ndims) { + for (int i = 0; i < nargs; ++i) { + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[i]); + } + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_mean(ctx0, x[0])); + + check_gradient("mean", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); + } + } + + // argmax + if (0) + { + const int nargs = 1; + + for (int ndims = 1; ndims <= 4; ++ndims) { + for (int i = 0; i < nargs; ++i) { + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[i]); + } + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_argmax(ctx0, x[0])); + + check_gradient("argmax", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); + } + } + // repeat { int64_t ne2[4]; @@ -549,15 +630,36 @@ int main(int argc, const char ** argv) { const int nargs = 1; for (int ndims = 1; ndims <= 2; ++ndims) { - x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); - x[1] = get_random_tensor(ctx0, ndims, ne2, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + x[1] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f); ggml_set_param(ctx0, x[0]); struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqr(ctx0, ggml_sub(ctx0, x[1], ggml_repeat(ctx0, x[0], x[1])))); check_gradient("repeat", ctx0, x, f, ndims, nargs, 1e-3f, 1e-2f, INFINITY); } + } + + // repeat back + { + int64_t ne2[4]; + get_random_dims(ne2, 4); + + ne2[0] = ne[0] * ne2[0]; + ne2[1] = ne[1] * ne2[1]; + ne2[2] = 1; + ne2[3] = 1; + + const int nargs = 1; + for (int ndims = 1; ndims <= 2; ++ndims) { + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + x[1] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f); + ggml_set_param(ctx0, x[0]); + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqr(ctx0, ggml_sub(ctx0, x[0], ggml_repeat_back(ctx0, x[1], x[0])))); + check_gradient("repeat back", ctx0, x, f, ndims, nargs, 1e-3f, 1e-2f, INFINITY); + } } // abs (finite differences do not work) @@ -566,7 +668,7 @@ int main(int argc, const char ** argv) { // for (int ndims = 1; ndims <= 2; ++ndims) { // for (int i = 0; i < nargs; ++i) { - // x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + // x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); // ggml_set_param(ctx0, x[i]); // } @@ -576,17 +678,82 @@ int main(int argc, const char ** argv) { // } //} + // sgn + { + const int nargs = 1; + + for (int ndims = 1; ndims <= 4; ++ndims) { + for (int i = 0; i < nargs; ++i) { + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[i]); + } + + struct ggml_tensor* f = ggml_sum(ctx0, ggml_sgn(ctx0, x[0])); + + check_gradient("sgn", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); + } + } + + // neg + { + const int nargs = 1; + + for (int ndims = 1; ndims <= 4; ++ndims) { + for (int i = 0; i < nargs; ++i) { + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[i]); + } + + struct ggml_tensor* f = ggml_sum(ctx0, ggml_neg(ctx0, x[0])); + + check_gradient("neg", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); + } + } + + // step + { + const int nargs = 1; + + for (int ndims = 1; ndims <= 4; ++ndims) { + for (int i = 0; i < nargs; ++i) { + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[i]); + } + + struct ggml_tensor* f = ggml_sum(ctx0, ggml_step(ctx0, x[0])); + + check_gradient("step", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); + } + } + + // tanh, not yet fully implemented + if(0) + { + const int nargs = 1; + + for (int ndims = 1; ndims <= 4; ++ndims) { + for (int i = 0; i < nargs; ++i) { + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[i]); + } + + struct ggml_tensor* f = ggml_sum(ctx0, ggml_tanh(ctx0, x[0])); + + check_gradient("tanh", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); + } + } + // mul_mat { const int nargs = 2; for (int ndims = 2; ndims <= 2; ++ndims) { - x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); { int64_t ne2[4]; get_random_dims(ne2, 4); ne2[0] = ne[0]; - x[1] = get_random_tensor(ctx0, ndims, ne2, -1.0f, 1.0f); + x[1] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f); } ggml_set_param(ctx0, x[0]); @@ -602,13 +769,63 @@ int main(int argc, const char ** argv) { } } + // elu, not yet fully implemented + if(0) + { + const int nargs = 1; + + for (int ndims = 1; ndims <= 4; ++ndims) { + for (int i = 0; i < nargs; ++i) { + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[i]); + } + + struct ggml_tensor* f = ggml_sum(ctx0, ggml_elu(ctx0, x[0])); + + check_gradient("elu", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); + } + } + + // relu + { + const int nargs = 1; + + for (int ndims = 1; ndims <= 4; ++ndims) { + for (int i = 0; i < nargs; ++i) { + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[i]); + } + + struct ggml_tensor* f = ggml_sum(ctx0, ggml_relu(ctx0, x[0])); + + check_gradient("relu", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); + } + } + + // gelu, not yet fully implemented + if(0) + { + const int nargs = 1; + + for (int ndims = 1; ndims <= 4; ++ndims) { + for (int i = 0; i < nargs; ++i) { + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[i]); + } + + struct ggml_tensor* f = ggml_sum(ctx0, ggml_gelu(ctx0, x[0])); + + check_gradient("gelu", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); + } + } + // silu { const int nargs = 1; for (int ndims = 1; ndims <= 2; ++ndims) { for (int i = 0; i < nargs; ++i) { - x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[i]); } @@ -629,7 +846,7 @@ int main(int argc, const char ** argv) { for (int ndims = 1; ndims <= 2; ++ndims) { for (int i = 0; i < nargs; ++i) { - x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[i]); } @@ -647,8 +864,8 @@ int main(int argc, const char ** argv) { ne2[0] = 1; for (int ndims = 1; ndims <= 2; ++ndims) { - x[1] = get_random_tensor(ctx0, 1, ne2, -1.0f, 1.0f); - x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[1] = get_random_tensor_f32(ctx0, 1, ne2, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[0]); ggml_set_param(ctx0, x[1]); @@ -659,20 +876,37 @@ int main(int argc, const char ** argv) { } } - // cpy + // cpy f32 { const int nargs = 2; for (int ndims = 1; ndims <= 2; ++ndims) { for (int i = 0; i < nargs; ++i) { - x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[i]); } // x[1] is overwritten by x[0], so the gradients don't propagate to x[1] struct ggml_tensor * f = ggml_sum(ctx0, ggml_cpy(ctx0, x[0], x[1])); - check_gradient("cpy", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); + check_gradient("cpy f32", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); + } + } + + // cpy f16 + { + const int nargs = 2; + + for (int ndims = 1; ndims <= 2; ++ndims) { + for (int i = 0; i < nargs; ++i) { + x[i] = get_random_tensor_f16(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[i]); + } + // x[1] is overwritten by x[0], so the gradients don't propagate to x[1] + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_cpy(ctx0, x[0], x[1])); + + check_gradient("cpy f16", ctx0, x, f, ndims, nargs, 1e-1f, 1e-1f, INFINITY); } } @@ -689,8 +923,8 @@ int main(int argc, const char ** argv) { for (int i = 0; i < ndims; ++i) { ne2[0] *= ne[i]; } - x[0] = get_random_tensor(ctx0, 1, ne2, -1.0f, 1.0f); - x[1] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, 1, ne2, -1.0f, 1.0f); + x[1] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[0]); @@ -712,8 +946,8 @@ int main(int argc, const char ** argv) { for (int i = 0; i < ndims; ++i) { ne2[0] *= ne[i]; } - x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); - x[1] = get_random_tensor(ctx0, 1, ne2, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + x[1] = get_random_tensor_f32(ctx0, 1, ne2, -1.0f, 1.0f); ggml_set_param(ctx0, x[0]); @@ -729,7 +963,7 @@ int main(int argc, const char ** argv) { const int nargs = 2; for (int ndims = 1; ndims <= 4; ++ndims) { - x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[0]); get_random_dims(ne2, 1); @@ -737,7 +971,7 @@ int main(int argc, const char ** argv) { get_random_dims(ne2, 1); } - x[1] = get_random_tensor(ctx0, 1, ne2, -1.0f, 1.0f); + x[1] = get_random_tensor_f32(ctx0, 1, ne2, -1.0f, 1.0f); ggml_set_param(ctx0, x[1]); const int max_offset = MAX(0, ggml_nelements(x[0]) - ggml_nelements(x[1])); @@ -758,7 +992,7 @@ int main(int argc, const char ** argv) { const int nargs = 2; for (int ndims = 2; ndims <= 4; ++ndims) { - x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[0]); get_random_dims(ne2, 2); @@ -766,7 +1000,7 @@ int main(int argc, const char ** argv) { get_random_dims(ne2, 2); } - x[1] = get_random_tensor(ctx0, 2, ne2, -1.0f, 1.0f); + x[1] = get_random_tensor_f32(ctx0, 2, ne2, -1.0f, 1.0f); ggml_set_param(ctx0, x[1]); max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]); @@ -790,7 +1024,7 @@ int main(int argc, const char ** argv) { const int nargs = 2; for (int ndims = 3; ndims <= 4; ++ndims) { - x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[0]); get_random_dims(ne2, 3); @@ -798,7 +1032,7 @@ int main(int argc, const char ** argv) { get_random_dims(ne2, 3); } - x[1] = get_random_tensor(ctx0, 3, ne2, -1.0f, 1.0f); + x[1] = get_random_tensor_f32(ctx0, 3, ne2, -1.0f, 1.0f); ggml_set_param(ctx0, x[1]); max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]); @@ -824,7 +1058,7 @@ int main(int argc, const char ** argv) { const int nargs = 2; for (int ndims = 4; ndims <= 4; ++ndims) { - x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[0]); get_random_dims(ne2, 4); @@ -832,7 +1066,7 @@ int main(int argc, const char ** argv) { get_random_dims(ne2, 4); } - x[1] = get_random_tensor(ctx0, 4, ne2, -1.0f, 1.0f); + x[1] = get_random_tensor_f32(ctx0, 4, ne2, -1.0f, 1.0f); ggml_set_param(ctx0, x[1]); max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]); @@ -858,7 +1092,7 @@ int main(int argc, const char ** argv) { const int nargs = 2; for (int ndims = 1; ndims <= 4; ++ndims) { - x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[0]); get_random_dims(ne2, 1); @@ -866,7 +1100,7 @@ int main(int argc, const char ** argv) { get_random_dims(ne2, 1); } - x[1] = get_random_tensor(ctx0, 1, ne2, -1.0f, 1.0f); + x[1] = get_random_tensor_f32(ctx0, 1, ne2, -1.0f, 1.0f); ggml_set_param(ctx0, x[1]); const int max_offset = MAX(0, ggml_nelements(x[0]) - ggml_nelements(x[1])); @@ -887,7 +1121,7 @@ int main(int argc, const char ** argv) { const int nargs = 1; for (int ndims = 2; ndims <= 4; ++ndims) { - x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[0]); get_random_dims(ne2, 2); @@ -895,7 +1129,7 @@ int main(int argc, const char ** argv) { get_random_dims(ne2, 2); } - x[1] = get_random_tensor(ctx0, 2, ne2, -1.0f, 1.0f); + x[1] = get_random_tensor_f32(ctx0, 2, ne2, -1.0f, 1.0f); ggml_set_param(ctx0, x[1]); max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]); @@ -915,7 +1149,7 @@ int main(int argc, const char ** argv) { const int nargs = 1; for (int ndims = 1; ndims <= 4; ++ndims) { - x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[0]); @@ -941,7 +1175,7 @@ int main(int argc, const char ** argv) { const int nargs = 1; for (int ndims = 1; ndims <= 4; ++ndims) { - x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); get_random_dims(ne2, 2); while (ne2[0]*ne2[1] > ggml_nelements(x[0])) { @@ -971,7 +1205,7 @@ int main(int argc, const char ** argv) { const int nargs = 1; for (int ndims = 1; ndims <= 4; ++ndims) { - x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); get_random_dims(ne2, 3); while (ne2[0]*ne2[1]*ne2[2] > ggml_nelements(x[0])) { @@ -1010,7 +1244,7 @@ int main(int argc, const char ** argv) { for (int i=ndims; i<4; ++i) { ne2[i] = 1; } - x[0] = get_random_tensor(ctx0, 4, ne2, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, 4, ne2, -1.0f, 1.0f); ggml_set_param(ctx0, x[0]); @@ -1043,7 +1277,7 @@ int main(int argc, const char ** argv) { for (int i=ndims; i<4; ++i) { ne2[i] = 1; } - x[0] = get_random_tensor(ctx0, 4, ne2, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, 4, ne2, -1.0f, 1.0f); ggml_set_param(ctx0, x[0]); @@ -1060,8 +1294,8 @@ int main(int argc, const char ** argv) { int64_t ne3[4] = {1+irand(ne[1]), 1, 1, 1}; const int nargs = 1; const int ndims = 2; - x[0] = get_random_tensor(ctx0, ndims, ne2, -1.0f, 1.0f); - x[1] = get_random_tensor_int(ctx0, 1, ne3, 0, ne2[1]); + x[0] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f); + x[1] = get_random_tensor_i32(ctx0, 1, ne3, 0, ne2[1]); ggml_set_param(ctx0, x[0]); @@ -1075,7 +1309,7 @@ int main(int argc, const char ** argv) { const int nargs = 1; const int ndims = 2; - x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[0]); int n_past = irand(ne[0]); @@ -1090,7 +1324,7 @@ int main(int argc, const char ** argv) { const int nargs = 1; const int ndims = 2; - x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[0]); int n_past = irand(ne[0]); @@ -1108,7 +1342,7 @@ int main(int argc, const char ** argv) { get_random_dims(ne2, 4); for (int ndims = 1; ndims <= 3; ++ndims) { - x[0] = get_random_tensor(ctx0, ndims, ne2, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f); ggml_set_param(ctx0, x[0]); struct ggml_tensor * f = ggml_sum(ctx0, ggml_soft_max(ctx0, x[0])); @@ -1125,8 +1359,8 @@ int main(int argc, const char ** argv) { get_random_dims(ne2, 4); for (int ndims = 1; ndims <= 3; ++ndims) { - x[0] = get_random_tensor(ctx0, ndims, ne2, -1.0f, 1.0f); - x[1] = get_random_tensor(ctx0, ndims, ne2, 0.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f); + x[1] = get_random_tensor_f32(ctx0, ndims, ne2, 0.0f, 1.0f); ggml_set_param(ctx0, x[0]); struct ggml_tensor * f = ggml_sum(ctx0, ggml_cross_entropy_loss(ctx0, x[0], x[1])); @@ -1136,7 +1370,7 @@ int main(int argc, const char ** argv) { } } - // rope + // rope f32 { const int nargs = 1; @@ -1148,7 +1382,7 @@ int main(int argc, const char ** argv) { for (int ndims = 3; ndims <= 4; ++ndims) { for (int mode = 0; mode < 4; ++mode) { for (int n_past = 1; n_past < ne2[2]; ++n_past) { - x[0] = get_random_tensor(ctx0, ndims, ne2, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f); ggml_set_param(ctx0, x[0]); @@ -1163,14 +1397,89 @@ int main(int argc, const char ** argv) { struct ggml_tensor * f = ggml_sum(ctx0, ggml_rope(ctx0, x[0], n_past, n_rot, mode, 0)); - GGML_PRINT_DEBUG("rope: n_past: %d n_rot: %d mode: %d\n", n_past, n_rot, mode); - check_gradient("rope", ctx0, x, f, ndims, nargs, 1e-2f, 1e-3f, INFINITY); + GGML_PRINT_DEBUG("rope f32: n_past: %d n_rot: %d mode: %d\n", n_past, n_rot, mode); + check_gradient("rope f32", ctx0, x, f, ndims, nargs, 1e-2f, 1e-3f, INFINITY); + } + } + } + } + + // rope f16 + { + const int nargs = 1; + + int64_t ne2[4]; + get_random_dims(ne2, 4); + ne2[0] += ne2[0] % 2; + int n_rot = ne2[0]; + + for (int ndims = 3; ndims <= 4; ++ndims) { + for (int mode = 0; mode < 4; ++mode) { + for (int n_past = 1; n_past < ne2[2]; ++n_past) { + x[0] = get_random_tensor_f16(ctx0, ndims, ne2, -1.0f, 1.0f); + + ggml_set_param(ctx0, x[0]); + + const bool skip_past = (mode & 1); + if (skip_past) { + // we have no past, so this would have to work on uninitialized memory. + // we only test the gradients here; + // skip_past should have no influence on gradient computation. + // so when other modes work, we assume that this does as well. + continue; + } + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_rope(ctx0, x[0], n_past, n_rot, mode, 0)); + + GGML_PRINT_DEBUG("rope f16: n_past: %d n_rot: %d mode: %d\n", n_past, n_rot, mode); + check_gradient("rope f16", ctx0, x, f, ndims, nargs, 1e-1f, 1e-1f, INFINITY); + } + } + } + } + + // flash_attn f32 + { + const int nargs = 3; + + int64_t ne2[4]; + + get_random_dims(ne2, 4); + int64_t D = ne2[0]; + int64_t N = ne2[1]; + int64_t M = ne2[2] + N; + int64_t B = ne2[3]; + + for (int masked = 0; masked <= 1; ++masked) { + for (int ndims = 2; ndims <= 4; ++ndims) { + int64_t neq[4] = { D, N, B, ne[3] }; + int64_t nek[4] = { D, M, B, ne[3] }; + int64_t nev[4] = { M, D, B, ne[3] }; + if (ndims == 2) { + neq[2] = 1; neq[3] = 1; + nek[2] = 1; nek[3] = 1; + nev[2] = 1; nev[3] = 1; + } else if (ndims == 3) { + neq[3] = 1; + nek[3] = 1; + nev[3] = 1; } + x[0] = get_random_tensor_f32(ctx0, ndims, neq, -0.1250f, 0.1250f); + x[1] = get_random_tensor_f32(ctx0, ndims, nek, -0.1250f, 0.1250f); + x[2] = get_random_tensor_f32(ctx0, ndims, nev, -0.1250f, 0.1250f); + ggml_set_param(ctx0, x[0]); + ggml_set_param(ctx0, x[1]); + ggml_set_param(ctx0, x[2]); + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_flash_attn(ctx0, x[0], x[1], x[2], (masked == 0))); + + check_gradient("flash_attn f32", ctx0, x, f, ndims, nargs, 1.5e-4f, INFINITY, 3.5f); } } } - // flash_attn + // flash_attn f16, not yet fully implemented + if(0) { const int nargs = 3; @@ -1196,16 +1505,16 @@ int main(int argc, const char ** argv) { nek[3] = 1; nev[3] = 1; } - x[0] = get_random_tensor(ctx0, ndims, neq, -0.1250f, 0.1250f); - x[1] = get_random_tensor(ctx0, ndims, nek, -0.1250f, 0.1250f); - x[2] = get_random_tensor(ctx0, ndims, nev, -0.1250f, 0.1250f); + x[0] = get_random_tensor_f16(ctx0, ndims, neq, -0.1250f, 0.1250f); + x[1] = get_random_tensor_f16(ctx0, ndims, nek, -0.1250f, 0.1250f); + x[2] = get_random_tensor_f16(ctx0, ndims, nev, -0.1250f, 0.1250f); ggml_set_param(ctx0, x[0]); ggml_set_param(ctx0, x[1]); ggml_set_param(ctx0, x[2]); struct ggml_tensor * f = ggml_sum(ctx0, ggml_flash_attn(ctx0, x[0], x[1], x[2], (masked == 0))); - check_gradient("flash_attn", ctx0, x, f, ndims, nargs, 1.5e-4f, INFINITY, 3.5f); + check_gradient("flash_attn f16", ctx0, x, f, ndims, nargs, 1.5e-4f, INFINITY, 3.5f); } } } diff --git a/tests/test-opt.c b/tests/test-opt.c index 5531814c48c99..4eef62bcfb96b 100644 --- a/tests/test-opt.c +++ b/tests/test-opt.c @@ -125,9 +125,9 @@ int main(void) { }; struct ggml_context * ctx = ggml_init(params); - int64_t ne1[4] = {4, 1024, 1, 1}; - int64_t ne2[4] = {4, 2048, 1, 1};; - int64_t ne3[4] = {1024, 2048, 1, 1}; + int64_t ne1[4] = {4, 128, 1, 1}; + int64_t ne2[4] = {4, 256, 1, 1};; + int64_t ne3[4] = {128, 256, 1, 1}; struct ggml_tensor * a = get_random_tensor(ctx, 2, ne1, -1, +1); struct ggml_tensor * b = get_random_tensor(ctx, 2, ne2, -1, +1); From 6d71e100feef2508d94f28d3cca7ca94407ba685 Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Mon, 24 Jul 2023 20:33:17 +0800 Subject: [PATCH 5/8] buff buffers --- llama.cpp | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/llama.cpp b/llama.cpp index f69a0dab56c4b..ddc84ebd64ec8 100644 --- a/llama.cpp +++ b/llama.cpp @@ -106,12 +106,12 @@ static void ggml_graph_compute_helper(std::vector & buf, ggml_cgraph * static const std::map & MEM_REQ_SCRATCH0(int n_ctx) { static std::map k_sizes = { - { MODEL_3B, ((size_t) n_ctx / 16ull + 92ull) * MB }, - { MODEL_7B, ((size_t) n_ctx / 16ull + 100ull) * MB }, - { MODEL_13B, ((size_t) n_ctx / 12ull + 120ull) * MB }, - { MODEL_30B, ((size_t) n_ctx / 9ull + 160ull) * MB }, - { MODEL_65B, ((size_t) n_ctx / 6ull + 256ull) * MB }, // guess - { MODEL_70B, ((size_t) n_ctx / 7ull + 164ull) * MB }, + { MODEL_3B, ((size_t) n_ctx / 16ull + 156ull) * MB }, + { MODEL_7B, ((size_t) n_ctx / 16ull + 164ull) * MB }, + { MODEL_13B, ((size_t) n_ctx / 12ull + 184ull) * MB }, + { MODEL_30B, ((size_t) n_ctx / 9ull + 224ull) * MB }, + { MODEL_65B, ((size_t) n_ctx / 6ull + 320ull) * MB }, // guess + { MODEL_70B, ((size_t) n_ctx / 6ull + 320ull) * MB }, }; return k_sizes; } @@ -119,12 +119,12 @@ static const std::map & MEM_REQ_SCRATCH0(int n_ctx) static const std::map & MEM_REQ_SCRATCH1() { static std::map k_sizes = { - { MODEL_3B, 128ull * MB }, - { MODEL_7B, 160ull * MB }, - { MODEL_13B, 192ull * MB }, - { MODEL_30B, 256ull * MB }, - { MODEL_65B, 384ull * MB }, // guess - { MODEL_70B, 304ull * MB }, + { MODEL_3B, 192ull * MB }, + { MODEL_7B, 224ull * MB }, + { MODEL_13B, 256ull * MB }, + { MODEL_30B, 320ull * MB }, + { MODEL_65B, 448ull * MB }, // guess + { MODEL_70B, 448ull * MB }, }; return k_sizes; } @@ -133,12 +133,12 @@ static const std::map & MEM_REQ_SCRATCH1() static const std::map & MEM_REQ_EVAL() { static std::map k_sizes = { - { MODEL_3B, 8ull * MB }, - { MODEL_7B, 10ull * MB }, - { MODEL_13B, 12ull * MB }, - { MODEL_30B, 16ull * MB }, - { MODEL_65B, 24ull * MB }, // guess - { MODEL_70B, 24ull * MB }, + { MODEL_3B, 16ull * MB }, + { MODEL_7B, 20ull * MB }, + { MODEL_13B, 24ull * MB }, + { MODEL_30B, 32ull * MB }, + { MODEL_65B, 48ull * MB }, // guess + { MODEL_70B, 48ull * MB }, }; return k_sizes; } From 7555dae4ccbeffaecae9a094214772b5d89d3043 Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Mon, 24 Jul 2023 22:40:36 +0800 Subject: [PATCH 6/8] ditch advanced subparsers --- koboldcpp.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/koboldcpp.py b/koboldcpp.py index 80c3d79be6a15..22375c1654a08 100755 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -212,7 +212,7 @@ def load_model(model_filename): os.environ["CUDA_VISIBLE_DEVICES"] = "2" for n in range(tensor_split_max): - if args.has_advanced=='advanced' and args.tensor_split and n < len(args.tensor_split): + if args.tensor_split and n < len(args.tensor_split): inputs.tensor_split[n] = float(args.tensor_split[n]) else: inputs.tensor_split[n] = 0 @@ -1648,11 +1648,8 @@ def main(args): compatgroup.add_argument("--useclblast", help="Use CLBlast for GPU Acceleration. Must specify exactly 2 arguments, platform ID and device ID (e.g. --useclblast 1 0).", type=int, choices=range(0,9), nargs=2) compatgroup.add_argument("--usecublas", help="Use CuBLAS for GPU Acceleration. Requires CUDA. Select lowvram to not allocate VRAM scratch buffer. Enter a number afterwards to select and use 1 GPU. Leaving no number will use all GPUs.", nargs='*',metavar=('[lowvram|normal] [main GPU ID]'), choices=['normal', 'lowvram', '0', '1', '2']) parser.add_argument("--gpulayers", help="Set number of layers to offload to GPU when using GPU. Requires GPU.",metavar=('[GPU layers]'), type=int, default=0) - - # for the seldom used esoteric commands - subparsers = parser.add_subparsers(title="Advanced Configs (For Experts)", dest="has_advanced") - advanced_subparser = subparsers.add_parser("advanced", help="Additional settings for experts. Run 'koboldcpp.py advanced --help' for more info") - advanced_subparser.add_argument("--tensor_split", help="CUDA with ALL set only. How to split tensors across multiple GPUs, space-separated list of proportions, e.g. 3 1", type=float, nargs='+') + parser.add_argument("--tensor_split", help="For CUDA with ALL GPU set only, ratio to split tensors across multiple GPUs, space-separated list of proportions, e.g. 7 3", metavar=('[Ratios]'), type=float, nargs='+') args = parser.parse_args() + main(args) From d8d2449bfbadcd7e5371b9bf93ae159901633bd9 Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Mon, 24 Jul 2023 22:46:53 +0800 Subject: [PATCH 7/8] better label (+1 squashed commits) Squashed commits: [f573b2c] cuda 3 target arch --- CMakeLists.txt | 2 +- koboldcpp.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7e70f7c198219..9a2c653d746ad 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -99,7 +99,7 @@ if (LLAMA_CUBLAS) if (LLAMA_CUDA_DMMV_F16) set(CMAKE_CUDA_ARCHITECTURES "60;61") # needed for f16 CUDA intrinsics else() - set(CMAKE_CUDA_ARCHITECTURES "37;61") # lowest CUDA 12 standard + lowest for integer intrinsics + set(CMAKE_CUDA_ARCHITECTURES "37;52;61") # lowest CUDA 12 standard + lowest for integer intrinsics endif() endif() message(STATUS "Using CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}") diff --git a/koboldcpp.py b/koboldcpp.py index 22375c1654a08..679d4c0396d11 100755 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -1642,7 +1642,7 @@ def main(args): parser.add_argument("--noavx2", help="Do not use AVX2 instructions, a slower compatibility mode for older devices. Does not work with --clblast.", action='store_true') parser.add_argument("--debugmode", help="Shows additional debug info in the terminal.", action='store_const', const=1, default=0) parser.add_argument("--skiplauncher", help="Doesn't display or use the new GUI launcher.", action='store_true') - parser.add_argument("--hordeconfig", help="Sets the display model name to something else, for easy use on AI Horde. Optional additional parameters set the horde max genlength, max ctxlen, API key and worker name.",metavar=('[hordemodelname]', '[hordelength] [hordemaxctx] [hordeapikey] [hordeworkername]'), nargs='+') + parser.add_argument("--hordeconfig", help="Sets the display model name to something else, for easy use on AI Horde. Optional additional parameters set the horde max genlength, max ctxlen, API key and worker name.",metavar=('[hordemodelname]', '[hordegenlength] [hordemaxctx] [hordeapikey] [hordeworkername]'), nargs='+') compatgroup = parser.add_mutually_exclusive_group() compatgroup.add_argument("--noblas", help="Do not use OpenBLAS for accelerated prompt ingestion", action='store_true') compatgroup.add_argument("--useclblast", help="Use CLBlast for GPU Acceleration. Must specify exactly 2 arguments, platform ID and device ID (e.g. --useclblast 1 0).", type=int, choices=range(0,9), nargs=2) From 9731682ad6dec280d48ec496281c09820d83e1e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D0=BB=D0=B5=D0=BA=D1=81=D0=B0=D0=BD=D0=B4=D1=80=20?= =?UTF-8?q?=D0=93=D0=B5=D1=80=D0=BC=D0=B0=D0=BD?= Date: Mon, 24 Jul 2023 21:21:32 +0500 Subject: [PATCH 8/8] Update Makefile (#345) fix requirements for idiotic source file concatenation (lol) --- Makefile | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 3c913666b27b0..f2386457f942a 100644 --- a/Makefile +++ b/Makefile @@ -335,13 +335,16 @@ grammar-parser.o: examples/grammar-parser.cpp examples/grammar-parser.h $(CXX) $(CXXFLAGS) -c $< -o $@ expose.o: expose.cpp expose.h $(CXX) $(CXXFLAGS) -c $< -o $@ -gpttype_adapter_failsafe.o: gpttype_adapter.cpp + +# idiotic "for easier compilation" +GPTTYPE_ADAPTER = gpttype_adapter.cpp otherarch/llama_v2.cpp llama.cpp otherarch/utils.cpp otherarch/gptj_v1.cpp otherarch/gptj_v2.cpp otherarch/gptj_v3.cpp otherarch/gpt2_v1.cpp otherarch/gpt2_v2.cpp otherarch/gpt2_v3.cpp otherarch/rwkv_v2.cpp otherarch/rwkv_v3.cpp otherarch/neox_v2.cpp otherarch/neox_v3.cpp otherarch/mpt_v3.cpp ggml.h ggml-cuda.h llama.h llama-util.h +gpttype_adapter_failsafe.o: $(GPTTYPE_ADAPTER) $(CXX) $(CXXFLAGS) $(FAILSAFE_FLAGS) -c $< -o $@ -gpttype_adapter.o: gpttype_adapter.cpp +gpttype_adapter.o: $(GPTTYPE_ADAPTER) $(CXX) $(CXXFLAGS) -c $< -o $@ -gpttype_adapter_clblast.o: gpttype_adapter.cpp +gpttype_adapter_clblast.o: $(GPTTYPE_ADAPTER) $(CXX) $(CXXFLAGS) $(CLBLAST_FLAGS) -c $< -o $@ -gpttype_adapter_cublas.o: gpttype_adapter.cpp +gpttype_adapter_cublas.o: $(GPTTYPE_ADAPTER) $(CXX) $(CXXFLAGS) $(CUBLAS_FLAGS) -c $< -o $@ clean: