From f21597834216dd9200d4b633c160edc22ccd47ab Mon Sep 17 00:00:00 2001 From: Tom Forbes Date: Sat, 12 Aug 2023 21:48:57 +0100 Subject: [PATCH] Switch to using thread_local regular expressions to avoid regex mutex contention --- native/Cargo.lock | 630 +++++++++--------- native/libcst/Cargo.toml | 10 +- native/libcst/benches/parser_benchmark.rs | 47 +- native/libcst/src/parser/numbers.rs | 50 +- native/libcst/src/tokenizer/core/mod.rs | 63 +- native/libcst/src/tokenizer/operators.rs | 7 +- .../libcst/src/tokenizer/text_position/mod.rs | 7 +- .../libcst/src/tokenizer/whitespace_parser.rs | 37 +- 8 files changed, 446 insertions(+), 405 deletions(-) diff --git a/native/Cargo.lock b/native/Cargo.lock index f3114fb84..6fa683eae 100644 --- a/native/Cargo.lock +++ b/native/Cargo.lock @@ -4,9 +4,9 @@ version = 3 [[package]] name = "aho-corasick" -version = "0.7.18" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f" +checksum = "6748e8def348ed4d14996fa801f4122cd763fff530258cdc03f64b25f89d3a5a" dependencies = [ "memchr", ] @@ -24,15 +24,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c7021ce4924a3f25f802b2cccd1af585e39ea1a363a1aa2e72afe54b67a3a7a7" [[package]] -name = "atty" -version = "0.2.14" +name = "anstyle" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" -dependencies = [ - "hermit-abi", - "libc", - "winapi", -] +checksum = "15c4c2c83f81532e5845a733998b6971faca23490340a418e9b72a3ec9de12ea" [[package]] name = "autocfg" @@ -40,17 +35,32 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" +[[package]] +name = "basic-toml" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7bfc506e7a2370ec239e1d072507b2a80c833083699d3c6fa176fbb4de8448c6" +dependencies = [ + "serde", +] + [[package]] name = "bitflags" version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" +[[package]] +name = "bitflags" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4682ae6287fcf752ecaabbfcc7b6f9b72aa33933dc23a554d853aea8eea8635" + [[package]] name = "bumpalo" -version = "3.12.0" +version = "3.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d261e256854913907f67ed06efbc3338dfe6179796deefc1ff763fc1aee5535" +checksum = "a3e2c3daef883ecc1b5d58c15adae93470a91d425f3532ba1695849656af3fc1" [[package]] name = "cast" @@ -58,6 +68,15 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" +[[package]] +name = "cc" +version = "1.0.83" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0" +dependencies = [ + "libc", +] + [[package]] name = "cfg-if" version = "1.0.0" @@ -75,9 +94,9 @@ dependencies = [ [[package]] name = "ciborium" -version = "0.2.0" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0c137568cc60b904a7724001b35ce2630fd00d5d84805fbb608ab89509d788f" +checksum = "effd91f6c78e5a4ace8a5d3c0b6bfaec9e2baaef55f3efc00e45fb2e477ee926" dependencies = [ "ciborium-io", "ciborium-ll", @@ -86,15 +105,15 @@ dependencies = [ [[package]] name = "ciborium-io" -version = "0.2.0" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "346de753af073cc87b52b2083a506b38ac176a44cfb05497b622e27be899b369" +checksum = "cdf919175532b369853f5d5e20b26b43112613fd6fe7aee757e35f7a44642656" [[package]] name = "ciborium-ll" -version = "0.2.0" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "213030a2b5a4e0c0892b6652260cf6ccac84827b83a85a534e178e3906c4cf1b" +checksum = "defaa24ecc093c77630e6c15e17c51f5e187bf35ee514f4e2d67baaa96dae22b" dependencies = [ "ciborium-io", "half", @@ -102,77 +121,44 @@ dependencies = [ [[package]] name = "clap" -version = "2.34.0" +version = "4.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c" +checksum = "1d5f1946157a96594eb2d2c10eb7ad9a2b27518cb3000209dec700c35df9197d" dependencies = [ - "bitflags", - "textwrap 0.11.0", - "unicode-width", + "clap_builder", ] [[package]] -name = "clap" -version = "3.2.22" +name = "clap_builder" +version = "4.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86447ad904c7fb335a790c9d7fe3d0d971dc523b8ccd1561a520de9a85302750" +checksum = "78116e32a042dd73c2901f0dc30790d20ff3447f3e3472fad359e8c3d282bcd6" dependencies = [ - "bitflags", + "anstyle", "clap_lex", - "indexmap", - "textwrap 0.15.1", ] [[package]] name = "clap_lex" -version = "0.2.4" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2850f2f5a82cbf437dd5af4d49848fbdfc27c157c3d010345776f952765261c5" -dependencies = [ - "os_str_bytes", -] +checksum = "cd7cc57abe963c6d3b9d8be5b06ba7c8957a930305ca90304f24ef040aa6f961" [[package]] name = "criterion" -version = "0.3.6" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b01d6de93b2b6c65e17c634a26653a29d107b3c98c607c765bf38d041531cd8f" -dependencies = [ - "atty", - "cast", - "clap 2.34.0", - "criterion-plot 0.4.5", - "csv", - "itertools", - "lazy_static", - "num-traits", - "oorandom", - "plotters", - "rayon", - "regex", - "serde", - "serde_cbor", - "serde_derive", - "serde_json", - "tinytemplate", - "walkdir", -] - -[[package]] -name = "criterion" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7c76e09c1aae2bc52b3d2f29e13c6572553b30c4aa1b8a49fd70de6412654cb" +checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" dependencies = [ "anes", - "atty", "cast", "ciborium", - "clap 3.2.22", - "criterion-plot 0.5.0", - "itertools", - "lazy_static", + "clap", + "criterion-plot", + "is-terminal", + "itertools 0.10.5", "num-traits", + "once_cell", "oorandom", "plotters", "rayon", @@ -186,21 +172,11 @@ dependencies = [ [[package]] name = "criterion-cycles-per-byte" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07175eab62c9054f8828955f9a84ddd3f732f796ee99fb4898453d60be4bcbdc" -dependencies = [ - "criterion 0.3.6", -] - -[[package]] -name = "criterion-plot" -version = "0.4.5" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2673cc8207403546f45f5fd319a974b1e6983ad1a3ee7e6041650013be041876" +checksum = "13929cb9262ca80bcd63a664f74e9ca958bef631c3e50a3117c4069119fd9628" dependencies = [ - "cast", - "itertools", + "criterion", ] [[package]] @@ -210,14 +186,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" dependencies = [ "cast", - "itertools", + "itertools 0.10.5", ] [[package]] name = "crossbeam-channel" -version = "0.5.5" +version = "0.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c02a4d71819009c192cf4872265391563fd6a84c81ff2c0f2a7026ca4c1d85c" +checksum = "a33c2bf77f2df06183c3aa30d1e96c0695a313d4f9c453cc3762a6db39f99200" dependencies = [ "cfg-if", "crossbeam-utils", @@ -225,9 +201,9 @@ dependencies = [ [[package]] name = "crossbeam-deque" -version = "0.8.1" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6455c0ca19f0d2fbf751b908d5c55c1f5cbc65e03c4225427254b46890bdde1e" +checksum = "ce6fd6f855243022dcecf8702fef0c297d4338e226845fe067f6341ad9fa0cef" dependencies = [ "cfg-if", "crossbeam-epoch", @@ -236,66 +212,64 @@ dependencies = [ [[package]] name = "crossbeam-epoch" -version = "0.9.9" +version = "0.9.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07db9d94cbd326813772c968ccd25999e5f8ae22f4f8d1b11effa37ef6ce281d" +checksum = "ae211234986c545741a7dc064309f67ee1e5ad243d0e48335adc0484d960bcc7" dependencies = [ "autocfg", "cfg-if", "crossbeam-utils", "memoffset", - "once_cell", "scopeguard", ] [[package]] name = "crossbeam-utils" -version = "0.8.10" +version = "0.8.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d82ee10ce34d7bc12c2122495e7593a9c41347ecdd64185af4ecf72cb1a7f83" +checksum = "5a22b2d63d4d1dc0b7f1b6b2747dd0088008a9be28b6ddf0b1e7d335e3037294" dependencies = [ "cfg-if", - "once_cell", ] [[package]] -name = "csv" -version = "1.2.2" +name = "difference" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "626ae34994d3d8d668f4269922248239db4ae42d538b14c398b74a52208e8086" -dependencies = [ - "csv-core", - "itoa", - "ryu", - "serde", -] +checksum = "524cbf6897b527295dff137cec09ecf3a05f4fddffd7dfcd1585403449e74198" [[package]] -name = "csv-core" -version = "0.1.10" +name = "either" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90" -dependencies = [ - "memchr", -] +checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" [[package]] -name = "difference" -version = "2.0.0" +name = "errno" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "524cbf6897b527295dff137cec09ecf3a05f4fddffd7dfcd1585403449e74198" +checksum = "6b30f669a7961ef1631673d2766cc92f52d64f7ef354d4fe0ddfd30ed52f0f4f" +dependencies = [ + "errno-dragonfly", + "libc", + "windows-sys", +] [[package]] -name = "either" -version = "1.6.1" +name = "errno-dragonfly" +version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457" +checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf" +dependencies = [ + "cc", + "libc", +] [[package]] name = "glob" -version = "0.3.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" +checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" [[package]] name = "half" @@ -304,75 +278,61 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7" [[package]] -name = "hashbrown" -version = "0.12.3" +name = "hermit-abi" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +checksum = "443144c8cdadd93ebf52ddb4056d257f5b52c04d3c804e657d19eb73fc33668b" [[package]] -name = "hermit-abi" -version = "0.1.19" +name = "indoc" +version = "1.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" -dependencies = [ - "libc", -] +checksum = "bfa799dd5ed20a7e349f3b4639aa80d74549c81716d9ec4f994c9b5815598306" [[package]] -name = "indexmap" -version = "1.9.1" +name = "is-terminal" +version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10a35a97730320ffe8e2d410b5d3b69279b98d2c14bdb8b70ea89ecf7888d41e" +checksum = "cb0889898416213fab133e1d33a0e5858a48177452750691bde3666d0fdbaf8b" dependencies = [ - "autocfg", - "hashbrown", + "hermit-abi", + "rustix", + "windows-sys", ] [[package]] -name = "indoc" -version = "1.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05a0bd019339e5d968b37855180087b7b9d512c5046fbd244cf8c95687927d6e" - -[[package]] -name = "instant" -version = "0.1.12" +name = "itertools" +version = "0.10.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" dependencies = [ - "cfg-if", + "either", ] [[package]] name = "itertools" -version = "0.10.5" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" dependencies = [ "either", ] [[package]] name = "itoa" -version = "1.0.2" +version = "1.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "112c678d4050afce233f4f2852bb2eb519230b3cf12f33585275537d7e41578d" +checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38" [[package]] name = "js-sys" -version = "0.3.58" +version = "0.3.64" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3fac17f7123a73ca62df411b1bf727ccc805daa070338fda671c86dac1bdc27" +checksum = "c5f195fe497f702db0f318b07fdd68edb16955aed830df8363d837542f8f935a" dependencies = [ "wasm-bindgen", ] -[[package]] -name = "lazy_static" -version = "1.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" - [[package]] name = "libc" version = "0.2.147" @@ -384,15 +344,15 @@ name = "libcst" version = "0.1.0" dependencies = [ "chic", - "criterion 0.4.0", + "criterion", "criterion-cycles-per-byte", "difference", - "itertools", + "itertools 0.11.0", "libcst_derive", - "once_cell", "paste", "peg", "pyo3", + "rayon", "regex", "thiserror", ] @@ -402,15 +362,21 @@ name = "libcst_derive" version = "0.1.0" dependencies = [ "quote", - "syn", + "syn 1.0.109", "trybuild", ] +[[package]] +name = "linux-raw-sys" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57bcfdad1b858c2db7c38303a6d2ad4dfaf5eb53dfeb0910128b2c26d6158503" + [[package]] name = "lock_api" -version = "0.4.7" +version = "0.4.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "327fa5b6a6940e4699ec49a9beae1ea4845c6bab9314e4f84ac68742139d8c53" +checksum = "c1cc9717a20b1bb222f333e6a92fd32f7d8a18ddc5a3191a11af45dcbf4dcd16" dependencies = [ "autocfg", "scopeguard", @@ -418,12 +384,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.17" +version = "0.4.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" -dependencies = [ - "cfg-if", -] +checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" [[package]] name = "memchr" @@ -433,27 +396,27 @@ checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" [[package]] name = "memoffset" -version = "0.6.5" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5aa361d4faea93603064a027415f07bd8e1d5c88c9fbf68bf56a285428fd79ce" +checksum = "5a634b1c61a95585bd15607c6ab0c4e5b226e695ff2800ba0cdccddf208c406c" dependencies = [ "autocfg", ] [[package]] name = "num-traits" -version = "0.2.15" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd" +checksum = "f30b0abd723be7e2ffca1272140fac1a2f084c77ec3e123c192b66af1ee9e6c2" dependencies = [ "autocfg", ] [[package]] name = "num_cpus" -version = "1.13.1" +version = "1.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19e64526ebdee182341572e50e9ad03965aa510cd94427a4549448f285e957a1" +checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" dependencies = [ "hermit-abi", "libc", @@ -461,9 +424,9 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.16.0" +version = "1.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86f0b0d4bf799edbc74508c1e8bf170ff5f41238e5f8225603ca7caaae2b7860" +checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" [[package]] name = "oorandom" @@ -471,42 +434,34 @@ version = "11.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575" -[[package]] -name = "os_str_bytes" -version = "6.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ff7415e9ae3fff1225851df9e0d9e4e5479f947619774677a63572e55e80eff" - [[package]] name = "parking_lot" -version = "0.11.2" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d17b78036a60663b797adeaee46f5c9dfebb86948d1255007a1d6be0271ff99" +checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" dependencies = [ - "instant", "lock_api", "parking_lot_core", ] [[package]] name = "parking_lot_core" -version = "0.8.5" +version = "0.9.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d76e8e1493bcac0d2766c42737f34458f1c8c50c0d23bcb24ea953affb273216" +checksum = "93f00c865fe7cabf650081affecd3871070f26767e7b2070a3ffae14c654b447" dependencies = [ "cfg-if", - "instant", "libc", "redox_syscall", "smallvec", - "winapi", + "windows-targets", ] [[package]] name = "paste" -version = "1.0.9" +version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1de2e551fb905ac83f73f7aedf2f0cb4a0da7e35efa24a202a936269f1f18e1" +checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c" [[package]] name = "peg" @@ -537,9 +492,9 @@ checksum = "9fa00462b37ead6d11a82c9d568b26682d78e0477dc02d1966c013af80969739" [[package]] name = "plotters" -version = "0.3.1" +version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32a3fd9ec30b9749ce28cd91f255d569591cdf937fe280c312143e3c4bad6f2a" +checksum = "d2c224ba00d7cadd4d5c660deaf2098e5e80e07846537c51f9cfa4be50c1fd45" dependencies = [ "num-traits", "plotters-backend", @@ -550,33 +505,33 @@ dependencies = [ [[package]] name = "plotters-backend" -version = "0.3.2" +version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d88417318da0eaf0fdcdb51a0ee6c3bed624333bff8f946733049380be67ac1c" +checksum = "9e76628b4d3a7581389a35d5b6e2139607ad7c75b17aed325f210aa91f4a9609" [[package]] name = "plotters-svg" -version = "0.3.1" +version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "521fa9638fa597e1dc53e9412a4f9cefb01187ee1f7413076f9e6749e2885ba9" +checksum = "38f6d39893cca0701371e3c27294f09797214b86f1fb951b89ade8ec04e2abab" dependencies = [ "plotters-backend", ] [[package]] name = "proc-macro2" -version = "1.0.46" +version = "1.0.66" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94e2ef8dbfc347b10c094890f778ee2e36ca9bb4262e86dc99cd217e35f3470b" +checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9" dependencies = [ "unicode-ident", ] [[package]] name = "pyo3" -version = "0.17.3" +version = "0.19.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "268be0c73583c183f2b14052337465768c07726936a260f480f0857cb95ba543" +checksum = "e681a6cfdc4adcc93b4d3cf993749a4552018ee0a9b65fc0ccfad74352c72a38" dependencies = [ "cfg-if", "indoc", @@ -591,9 +546,9 @@ dependencies = [ [[package]] name = "pyo3-build-config" -version = "0.17.3" +version = "0.19.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28fcd1e73f06ec85bf3280c48c67e731d8290ad3d730f8be9dc07946923005c8" +checksum = "076c73d0bc438f7a4ef6fdd0c3bb4732149136abd952b110ac93e4edb13a6ba5" dependencies = [ "once_cell", "target-lexicon", @@ -601,9 +556,9 @@ dependencies = [ [[package]] name = "pyo3-ffi" -version = "0.17.3" +version = "0.19.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f6cb136e222e49115b3c51c32792886defbfb0adead26a688142b346a0b9ffc" +checksum = "e53cee42e77ebe256066ba8aa77eff722b3bb91f3419177cf4cd0f304d3284d9" dependencies = [ "libc", "pyo3-build-config", @@ -611,53 +566,51 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.17.3" +version = "0.19.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94144a1266e236b1c932682136dc35a9dee8d3589728f68130c7c3861ef96b28" +checksum = "dfeb4c99597e136528c6dd7d5e3de5434d1ceaf487436a3f03b2d56b6fc9efd1" dependencies = [ "proc-macro2", "pyo3-macros-backend", "quote", - "syn", + "syn 1.0.109", ] [[package]] name = "pyo3-macros-backend" -version = "0.17.3" +version = "0.19.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8df9be978a2d2f0cdebabb03206ed73b11314701a5bfe71b0d753b81997777f" +checksum = "947dc12175c254889edc0c02e399476c2f652b4b9ebd123aa655c224de259536" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.109", ] [[package]] name = "quote" -version = "1.0.21" +version = "1.0.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbe448f377a7d6961e30f5955f9b8d106c3f5e449d493ee1b125c1d43c2b5179" +checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" dependencies = [ "proc-macro2", ] [[package]] name = "rayon" -version = "1.5.3" +version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd99e5772ead8baa5215278c9b15bf92087709e9c1b2d1f97cdb5a183c933a7d" +checksum = "1d2df5196e37bcc87abebc0053e20787d73847bb33134a69841207dd0a47f03b" dependencies = [ - "autocfg", - "crossbeam-deque", "either", "rayon-core", ] [[package]] name = "rayon-core" -version = "1.9.3" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "258bcdb5ac6dad48491bb2992db6b7cf74878b0384908af124823d118c99683f" +checksum = "4b8f95bd6966f5c87776639160a66bd8ab9895d9d4ab01ddba9fc60661aebe8d" dependencies = [ "crossbeam-channel", "crossbeam-deque", @@ -667,18 +620,30 @@ dependencies = [ [[package]] name = "redox_syscall" -version = "0.2.13" +version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62f25bc4c7e55e0b0b7a1d43fb893f4fa1361d0abe38b9ce4f323c2adfe6ef42" +checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29" dependencies = [ - "bitflags", + "bitflags 1.3.2", ] [[package]] name = "regex" -version = "1.7.0" +version = "1.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e076559ef8e241f2ae3479e36f97bd5741c0330689e217ad51ce2c76808b868a" +checksum = "81bc1d4caf89fac26a70747fe603c130093b53c773888797a6329091246d651a" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fed1ceff11a1dddaee50c9dc8e4938bd106e9d89ae372f192311e7da498e3b69" dependencies = [ "aho-corasick", "memchr", @@ -687,15 +652,28 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.6.27" +version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3f87b73ce11b1619a3c6332f45341e0047173771e8b8b73f87bfeefb7b56244" +checksum = "e5ea92a5b6195c6ef2a0295ea818b312502c6fc94dde986c5553242e18fd4ce2" + +[[package]] +name = "rustix" +version = "0.38.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19ed4fa021d81c8392ce04db050a3da9a60299050b7ae1cf482d862b54a7218f" +dependencies = [ + "bitflags 2.4.0", + "errno", + "libc", + "linux-raw-sys", + "windows-sys", +] [[package]] name = "ryu" -version = "1.0.10" +version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3f6f92acf49d1b98f7a81226834412ada05458b7364277387724a237f062695" +checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741" [[package]] name = "same-file" @@ -708,45 +686,35 @@ dependencies = [ [[package]] name = "scopeguard" -version = "1.1.0" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] name = "serde" -version = "1.0.145" +version = "1.0.187" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "728eb6351430bccb993660dfffc5a72f91ccc1295abaa8ce19b27ebe4f75568b" +checksum = "30a7fe14252655bd1e578af19f5fa00fe02fd0013b100ca6b49fde31c41bae4c" dependencies = [ "serde_derive", ] -[[package]] -name = "serde_cbor" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2bef2ebfde456fb76bbcf9f59315333decc4fda0b2b44b420243c11e0f5ec1f5" -dependencies = [ - "half", - "serde", -] - [[package]] name = "serde_derive" -version = "1.0.145" +version = "1.0.187" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81fa1584d3d1bcacd84c277a0dfe21f5b0f6accf4a23d04d4c6d61f1af522b4c" +checksum = "e46b2a6ca578b3f1d4501b12f78ed4692006d79d82a1a7c561c12dbc3d625eb8" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.29", ] [[package]] name = "serde_json" -version = "1.0.81" +version = "1.0.105" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b7ce2b32a1aed03c558dc61a5cd328f15aff2dbc17daad8fb8af04d2100e15c" +checksum = "693151e1ac27563d6dbcec9dee9fbd5da8539b20fa14ad3752b2e6d363ace360" dependencies = [ "itoa", "ryu", @@ -755,15 +723,15 @@ dependencies = [ [[package]] name = "smallvec" -version = "1.8.1" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc88c725d61fc6c3132893370cac4a0200e3fedf5da8331c570664b1987f5ca2" +checksum = "62bb4feee49fdd9f707ef802e22365a35de4b7b299de4763d44bfea899442ff9" [[package]] name = "syn" -version = "1.0.103" +version = "1.0.109" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a864042229133ada95abf3b54fdc62ef5ccabe9515b64717bcb9a1919e59445d" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" dependencies = [ "proc-macro2", "quote", @@ -771,53 +739,49 @@ dependencies = [ ] [[package]] -name = "target-lexicon" -version = "0.12.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c02424087780c9b71cc96799eaeddff35af2bc513278cda5c99fc1f5d026d3c1" - -[[package]] -name = "termcolor" -version = "1.1.3" +name = "syn" +version = "2.0.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bab24d30b911b2376f3a13cc2cd443142f0c81dda04c118693e35b3835757755" +checksum = "c324c494eba9d92503e6f1ef2e6df781e78f6a7705a0202d9801b198807d518a" dependencies = [ - "winapi-util", + "proc-macro2", + "quote", + "unicode-ident", ] [[package]] -name = "textwrap" -version = "0.11.0" +name = "target-lexicon" +version = "0.12.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" -dependencies = [ - "unicode-width", -] +checksum = "9d0e916b1148c8e263850e1ebcbd046f333e0683c724876bb0da63ea4373dc8a" [[package]] -name = "textwrap" -version = "0.15.1" +name = "termcolor" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "949517c0cf1bf4ee812e2e07e08ab448e3ae0d23472aee8a06c985f0c8815b16" +checksum = "be55cf8942feac5c765c2c993422806843c9a9a45d4d5c407ad6dd2ea95eb9b6" +dependencies = [ + "winapi-util", +] [[package]] name = "thiserror" -version = "1.0.37" +version = "1.0.47" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10deb33631e3c9018b9baf9dcbbc4f737320d2b576bac10f6aefa048fa407e3e" +checksum = "97a802ec30afc17eee47b2855fc72e0c4cd62be9b4efe6591edde0ec5bd68d8f" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.37" +version = "1.0.47" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "982d17546b47146b28f7c22e3d08465f6b8903d0ea13c1660d9d84a6e7adcdbb" +checksum = "6bb623b56e39ab7dcd4b1b98bb6c8f8d907ed255b18de254088016b27a8ee19b" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.29", ] [[package]] @@ -830,64 +794,48 @@ dependencies = [ "serde_json", ] -[[package]] -name = "toml" -version = "0.5.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d82e1a7758622a465f8cee077614c73484dac5b836c02ff6a40d5d1010324d7" -dependencies = [ - "serde", -] - [[package]] name = "trybuild" -version = "1.0.71" +version = "1.0.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea496675d71016e9bc76aa42d87f16aefd95447cc5818e671e12b2d7e269075d" +checksum = "6df60d81823ed9c520ee897489573da4b1d79ffbe006b8134f46de1a1aa03555" dependencies = [ + "basic-toml", "glob", "once_cell", "serde", "serde_derive", "serde_json", "termcolor", - "toml", ] [[package]] name = "unicode-ident" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5bd2fe26506023ed7b5e1e315add59d6f584c621d037f9368fea9cfb988f368c" - -[[package]] -name = "unicode-width" -version = "0.1.10" +version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b" +checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c" [[package]] name = "unindent" -version = "0.1.9" +version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52fee519a3e570f7df377a06a1a7775cdbfb7aa460be7e08de2b1f0e69973a44" +checksum = "e1766d682d402817b5ac4490b3c3002d91dfa0d22812f341609f97b08757359c" [[package]] name = "walkdir" -version = "2.3.2" +version = "2.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "808cf2735cd4b6866113f648b791c6adc5714537bc222d9347bb203386ffda56" +checksum = "36df944cda56c7d8d8b7496af378e6b16de9284591917d307c9b4d313c44e698" dependencies = [ "same-file", - "winapi", "winapi-util", ] [[package]] name = "wasm-bindgen" -version = "0.2.81" +version = "0.2.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c53b543413a17a202f4be280a7e5c62a1c69345f5de525ee64f8cfdbc954994" +checksum = "7706a72ab36d8cb1f80ffbf0e071533974a60d0a308d01a5d0375bf60499a342" dependencies = [ "cfg-if", "wasm-bindgen-macro", @@ -895,24 +843,24 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.81" +version = "0.2.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5491a68ab4500fa6b4d726bd67408630c3dbe9c4fe7bda16d5c82a1fd8c7340a" +checksum = "5ef2b6d3c510e9625e5fe6f509ab07d66a760f0885d858736483c32ed7809abd" dependencies = [ "bumpalo", - "lazy_static", "log", + "once_cell", "proc-macro2", "quote", - "syn", + "syn 2.0.29", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-macro" -version = "0.2.81" +version = "0.2.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c441e177922bc58f1e12c022624b6216378e5febc2f0533e41ba443d505b80aa" +checksum = "dee495e55982a3bd48105a7b947fd2a9b4a8ae3010041b9e0faab3f9cd028f1d" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -920,28 +868,28 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.81" +version = "0.2.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d94ac45fcf608c1f45ef53e748d35660f168490c10b23704c7779ab8f5c3048" +checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.29", "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.81" +version = "0.2.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a89911bd99e5f3659ec4acf9c4d93b0a90fe4a2a11f15328472058edc5261be" +checksum = "ca6ad05a4870b2bf5fe995117d3728437bd27d7cd5f06f13c17443ef369775a1" [[package]] name = "web-sys" -version = "0.3.58" +version = "0.3.64" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fed94beee57daf8dd7d51f2b15dc2bcde92d7a72304cdf662a4371008b71b90" +checksum = "9b85cbef8c220a6abc02aefd892dfc0fc23afb1c6a426316ec33253a3877249b" dependencies = [ "js-sys", "wasm-bindgen", @@ -977,3 +925,69 @@ name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" diff --git a/native/libcst/Cargo.toml b/native/libcst/Cargo.toml index 2e0ebb1c2..ab799b556 100644 --- a/native/libcst/Cargo.toml +++ b/native/libcst/Cargo.toml @@ -34,17 +34,17 @@ pyo3 = { version = ">=0.17", optional = true } thiserror = "1.0.37" peg = "0.8.1" chic = "1.2.2" -itertools = "0.10.5" -once_cell = "1.16.0" -regex = "1.7.0" +regex = "1.9.3" libcst_derive = { path = "../libcst_derive" } [dev-dependencies] -criterion = { version = "0.4.0", features = ["html_reports"] } +criterion = { version = "0.5.1", features = ["html_reports"] } difference = "2.0.0" +rayon = "1.7.0" +itertools = "0.11.0" [target.'cfg(target_arch = "x86_64")'.dev-dependencies] -criterion-cycles-per-byte = "0.1" +criterion-cycles-per-byte = "0.5.0" [[bench]] name = "parser_benchmark" diff --git a/native/libcst/benches/parser_benchmark.rs b/native/libcst/benches/parser_benchmark.rs index d9e2b2266..74f049c4c 100644 --- a/native/libcst/benches/parser_benchmark.rs +++ b/native/libcst/benches/parser_benchmark.rs @@ -8,14 +8,14 @@ use std::{ time::Duration, }; -use criterion::{ - black_box, criterion_group, criterion_main, measurement::Measurement, BatchSize, Criterion, -}; +use criterion::{BatchSize, BenchmarkId, black_box, Criterion, criterion_group, criterion_main, measurement::Measurement, Throughput}; +use itertools::Itertools; +use rayon::prelude::*; + #[cfg(target_arch = "x86_64")] use criterion_cycles_per_byte::CyclesPerByte; -use itertools::Itertools; use libcst_native::{ - parse_module, parse_tokens_without_whitespace, tokenize, Codegen, Config, Inflate, + Codegen, Config, Inflate, parse_module, parse_tokens_without_whitespace, tokenize, }; #[cfg(not(windows))] @@ -23,7 +23,7 @@ const NEWLINE: &str = "\n"; #[cfg(windows)] const NEWLINE: &str = "\r\n"; -fn load_all_fixtures() -> String { +fn load_all_fixtures_vec() -> Vec { let mut path = PathBuf::from(file!()); path.pop(); path.pop(); @@ -44,7 +44,11 @@ fn load_all_fixtures() -> String { let path = file.unwrap().path(); std::fs::read_to_string(&path).expect("reading_file") }) - .join(NEWLINE) + .collect() +} + +fn load_all_fixtures() -> String { + load_all_fixtures_vec().join(NEWLINE) } pub fn inflate_benchmarks(c: &mut Criterion) { @@ -119,6 +123,33 @@ pub fn parse_into_cst_benchmarks(c: &mut Criterion) { group.finish(); } +pub fn parse_into_cst_multithreaded_benchmarks(c: &mut Criterion) where ::Value: Send { + let fixtures = load_all_fixtures_vec(); + let mut group = c.benchmark_group("parse_into_cst_parallel"); + group.measurement_time(Duration::from_secs(15)); + group.warm_up_time(Duration::from_secs(5)); + + for thread_count in 1..10 { + let expanded_fixtures = (0..thread_count).flat_map(|_| fixtures.clone()).collect_vec(); + group.throughput(Throughput::Elements(expanded_fixtures.len() as u64)); + group.bench_with_input(BenchmarkId::from_parameter(thread_count), &thread_count, |b, thread_count| { + let thread_pool = rayon::ThreadPoolBuilder::new() + .num_threads(*thread_count).build().unwrap(); + thread_pool.install(|| { + b.iter_with_large_drop( + || { + expanded_fixtures.par_iter() + .map(|contents| black_box(parse_module(&contents, None))) + .collect::>() + }, + ); + }); + }); + } + + group.finish(); +} + #[cfg(target_arch = "x86_64")] fn get_config() -> Criterion { // criterion_cycles_per_byte is only supported on x86 @@ -133,6 +164,6 @@ fn get_config() -> Criterion { criterion_group!( name=benches; config=get_config(); - targets=parser_benchmarks, codegen_benchmarks, inflate_benchmarks, tokenize_benchmarks, parse_into_cst_benchmarks + targets=parse_into_cst_multithreaded_benchmarks ); criterion_main!(benches); diff --git a/native/libcst/src/parser/numbers.rs b/native/libcst/src/parser/numbers.rs index 6d7a0d8e7..95db532bf 100644 --- a/native/libcst/src/parser/numbers.rs +++ b/native/libcst/src/parser/numbers.rs @@ -3,7 +3,6 @@ // This source code is licensed under the MIT license found in the // LICENSE file in the root directory of this source tree -use once_cell::sync::Lazy; use regex::Regex; use crate::nodes::deflated::{Expression, Float, Imaginary, Integer}; @@ -13,51 +12,48 @@ static BIN: &str = r"0[bB](?:_?[01])+"; static OCT: &str = r"0[oO](?:_?[0-7])+"; static DECIMAL: &str = r"(?:0(?:_?0)*|[1-9](?:_?[0-9])*)"; -static INTEGER_RE: Lazy = Lazy::new(|| { - Regex::new(format!("^({}|{}|{}|{})$", HEX, BIN, OCT, DECIMAL).as_str()).expect("regex") -}); - static EXPONENT: &str = r"[eE][-+]?[0-9](?:_?[0-9])*"; // Note: these don't exactly match the python implementation (exponent is not included) static POINT_FLOAT: &str = r"([0-9](?:_?[0-9])*\.(?:[0-9](?:_?[0-9])*)?|\.[0-9](?:_?[0-9])*)"; static EXP_FLOAT: &str = r"[0-9](?:_?[0-9])*"; -static FLOAT_RE: Lazy = Lazy::new(|| { - Regex::new( - format!( - "^({}({})?|{}{})$", - POINT_FLOAT, EXPONENT, EXP_FLOAT, EXPONENT +thread_local! { + static INTEGER_RE: Regex = + Regex::new(format!("^({}|{}|{}|{})$", HEX, BIN, OCT, DECIMAL).as_str()).expect("regex"); + static FLOAT_RE: Regex = + Regex::new( + format!( + "^({}({})?|{}{})$", + POINT_FLOAT, EXPONENT, EXP_FLOAT, EXPONENT + ) + .as_str(), ) - .as_str(), - ) - .expect("regex") -}); - -static IMAGINARY_RE: Lazy = Lazy::new(|| { - Regex::new( - format!( - r"^([0-9](?:_?[0-9])*[jJ]|({}({})?|{}{})[jJ])$", - POINT_FLOAT, EXPONENT, EXP_FLOAT, EXPONENT + .expect("regex"); + static IMAGINARY_RE: Regex = + Regex::new( + format!( + r"^([0-9](?:_?[0-9])*[jJ]|({}({})?|{}{})[jJ])$", + POINT_FLOAT, EXPONENT, EXP_FLOAT, EXPONENT + ) + .as_str(), ) - .as_str(), - ) - .expect("regex") -}); + .expect("regex"); +} pub(crate) fn parse_number(raw: &str) -> Expression { - if INTEGER_RE.is_match(raw) { + if INTEGER_RE.with(|r| r.is_match(raw)) { Expression::Integer(Box::new(Integer { value: raw, lpar: Default::default(), rpar: Default::default(), })) - } else if FLOAT_RE.is_match(raw) { + } else if FLOAT_RE.with(|r| r.is_match(raw)) { Expression::Float(Box::new(Float { value: raw, lpar: Default::default(), rpar: Default::default(), })) - } else if IMAGINARY_RE.is_match(raw) { + } else if IMAGINARY_RE.with(|r| r.is_match(raw)) { Expression::Imaginary(Box::new(Imaginary { value: raw, lpar: Default::default(), diff --git a/native/libcst/src/tokenizer/core/mod.rs b/native/libcst/src/tokenizer/core/mod.rs index 359451a3f..2365eaa36 100644 --- a/native/libcst/src/tokenizer/core/mod.rs +++ b/native/libcst/src/tokenizer/core/mod.rs @@ -58,7 +58,6 @@ /// [RustPython's parser]: https://crates.io/crates/rustpython-parser mod string_types; -use once_cell::sync::Lazy; use regex::Regex; use std::cell::RefCell; use std::cmp::Ordering; @@ -83,25 +82,27 @@ const MAX_INDENT: usize = 100; // https://github.com/rust-lang/rust/issues/71763 const MAX_CHAR: char = '\u{10ffff}'; -static SPACE_TAB_FORMFEED_RE: Lazy = Lazy::new(|| Regex::new(r"\A[ \f\t]+").expect("regex")); -static ANY_NON_NEWLINE_RE: Lazy = Lazy::new(|| Regex::new(r"\A[^\r\n]+").expect("regex")); -static STRING_PREFIX_RE: Lazy = - Lazy::new(|| Regex::new(r"\A(?i)(u|[bf]r|r[bf]|r|b|f)").expect("regex")); -static POTENTIAL_IDENTIFIER_TAIL_RE: Lazy = - Lazy::new(|| Regex::new(r"\A([a-zA-Z0-9_]|[^\x00-\x7f])+").expect("regex")); -static DECIMAL_DOT_DIGIT_RE: Lazy = Lazy::new(|| Regex::new(r"\A\.[0-9]").expect("regex")); -static DECIMAL_TAIL_RE: Lazy = - Lazy::new(|| Regex::new(r"\A[0-9](_?[0-9])*").expect("regex")); -static HEXADECIMAL_TAIL_RE: Lazy = - Lazy::new(|| Regex::new(r"\A(_?[0-9a-fA-F])+").expect("regex")); -static OCTAL_TAIL_RE: Lazy = Lazy::new(|| Regex::new(r"\A(_?[0-7])+").expect("regex")); -static BINARY_TAIL_RE: Lazy = Lazy::new(|| Regex::new(r"\A(_?[01])+").expect("regex")); - -/// Used to verify identifiers when there's a non-ascii character in them. -// This changes across unicode revisions. We'd need to ship our own unicode tables to 100% match a -// given Python version's behavior. -static UNICODE_IDENTIFIER_RE: Lazy = - Lazy::new(|| Regex::new(r"\A[\p{XID_Start}_]\p{XID_Continue}*\z").expect("regex")); +thread_local! { + static SPACE_TAB_FORMFEED_RE: Regex = Regex::new(r"\A[ \f\t]+").expect("regex"); + static ANY_NON_NEWLINE_RE: Regex = Regex::new(r"\A[^\r\n]+").expect("regex"); + static STRING_PREFIX_RE: Regex = + Regex::new(r"\A(?i)(u|[bf]r|r[bf]|r|b|f)").expect("regex"); + static POTENTIAL_IDENTIFIER_TAIL_RE: Regex = + Regex::new(r"\A([a-zA-Z0-9_]|[^\x00-\x7f])+").expect("regex"); + static DECIMAL_DOT_DIGIT_RE: Regex = Regex::new(r"\A\.[0-9]").expect("regex"); + static DECIMAL_TAIL_RE: Regex = + Regex::new(r"\A[0-9](_?[0-9])*").expect("regex"); + static HEXADECIMAL_TAIL_RE: Regex = + Regex::new(r"\A(_?[0-9a-fA-F])+").expect("regex"); + static OCTAL_TAIL_RE: Regex = Regex::new(r"\A(_?[0-7])+").expect("regex"); + static BINARY_TAIL_RE: Regex = Regex::new(r"\A(_?[01])+").expect("regex"); + + /// Used to verify identifiers when there's a non-ascii character in them. + // This changes across unicode revisions. We'd need to ship our own unicode tables to 100% match a + // given Python version's behavior. + static UNICODE_IDENTIFIER_RE: Regex = + Regex::new(r"\A[\p{XID_Start}_]\p{XID_Continue}*\z").expect("regex"); +} #[derive(Debug, Eq, PartialEq, Copy, Clone)] pub enum TokType { @@ -316,11 +317,11 @@ impl<'t> TokState<'t> { 'again: loop { // Skip spaces - self.text_pos.consume(&*SPACE_TAB_FORMFEED_RE); + SPACE_TAB_FORMFEED_RE.with(|v| self.text_pos.consume(v)); // Skip comment, unless it's a type comment if self.text_pos.peek() == Some('#') { - self.text_pos.consume(&*ANY_NON_NEWLINE_RE); + ANY_NON_NEWLINE_RE.with(|v| self.text_pos.consume(v)); // type_comment is not supported } @@ -384,7 +385,7 @@ impl<'t> TokState<'t> { } // Number starting with period - Some('.') if self.text_pos.matches(&*DECIMAL_DOT_DIGIT_RE) => { + Some('.') if DECIMAL_DOT_DIGIT_RE.with(|r| self.text_pos.matches(r)) => { self.consume_number(NumberState::Fraction) } @@ -472,7 +473,7 @@ impl<'t> TokState<'t> { } // Operator - Some(_) if self.text_pos.consume(&*OPERATOR_RE) => Ok(TokType::Op), + Some(_) if OPERATOR_RE.with(|r| self.text_pos.consume(r)) => Ok(TokType::Op), // Bad character // If nothing works, fall back to this error. CPython returns an OP in this case, @@ -623,7 +624,7 @@ impl<'t> TokState<'t> { fn consume_identifier_or_prefixed_string(&mut self) -> Result> { // Process the various legal combinations of b"", r"", u"", and f"". - if self.text_pos.consume(&*STRING_PREFIX_RE) { + if STRING_PREFIX_RE.with(|r| self.text_pos.consume(r)) { if let Some('"') | Some('\'') = self.text_pos.peek() { // We found a string, not an identifier. Bail! if self.split_fstring @@ -645,7 +646,7 @@ impl<'t> TokState<'t> { Some('a'..='z') | Some('A'..='Z') | Some('_') | Some('\u{80}'..=MAX_CHAR) )); } - self.text_pos.consume(&*POTENTIAL_IDENTIFIER_TAIL_RE); + POTENTIAL_IDENTIFIER_TAIL_RE.with(|r| self.text_pos.consume(r)); let identifier_str = self.text_pos.slice_from_start_pos(&self.start_pos); if !verify_identifier(identifier_str) { // TODO: async/await @@ -691,7 +692,7 @@ impl<'t> TokState<'t> { match self.text_pos.peek() { Some('x') | Some('X') => { self.text_pos.next(); - if !self.text_pos.consume(&*HEXADECIMAL_TAIL_RE) + if !HEXADECIMAL_TAIL_RE.with(|r| self.text_pos.consume(r)) || self.text_pos.peek() == Some('_') { Err(TokError::BadHexadecimal) @@ -701,7 +702,7 @@ impl<'t> TokState<'t> { } Some('o') | Some('O') => { self.text_pos.next(); - if !self.text_pos.consume(&*OCTAL_TAIL_RE) + if !OCTAL_TAIL_RE.with(|r| self.text_pos.consume(r)) || self.text_pos.peek() == Some('_') { return Err(TokError::BadOctal); @@ -715,7 +716,7 @@ impl<'t> TokState<'t> { } Some('b') | Some('B') => { self.text_pos.next(); - if !self.text_pos.consume(&*BINARY_TAIL_RE) + if !BINARY_TAIL_RE.with(|r| self.text_pos.consume(r)) || self.text_pos.peek() == Some('_') { return Err(TokError::BadBinary); @@ -819,7 +820,7 @@ impl<'t> TokState<'t> { /// Processes a decimal tail. This is the bit after the dot or after an E in a float. fn consume_decimal_tail(&mut self) -> Result<(), TokError<'t>> { - let result = self.text_pos.consume(&*DECIMAL_TAIL_RE); + let result = DECIMAL_TAIL_RE.with(|r| self.text_pos.consume(r)); // Assumption: If we've been called, the first character is an integer, so we must have a // regex match debug_assert!(result, "try_decimal_tail was called on a non-digit char"); @@ -1058,7 +1059,7 @@ fn verify_identifier(name: &str) -> bool { // TODO: If `name` is non-ascii, must first normalize name to NFKC. // Common case: If the entire string is ascii, we can avoid the more expensive regex check, // since the tokenizer already validates ascii characters before calling us. - name.is_ascii() || UNICODE_IDENTIFIER_RE.is_match(name) + name.is_ascii() || UNICODE_IDENTIFIER_RE.with(|r| r.is_match(name)) } #[derive(Clone)] diff --git a/native/libcst/src/tokenizer/operators.rs b/native/libcst/src/tokenizer/operators.rs index e5ef1526f..3252f774c 100644 --- a/native/libcst/src/tokenizer/operators.rs +++ b/native/libcst/src/tokenizer/operators.rs @@ -8,7 +8,6 @@ // code or that we retain the original work's copyright information. // https://docs.python.org/3/license.html#zero-clause-bsd-license-for-code-in-the-python-release-documentation -use once_cell::sync::Lazy; use regex::Regex; /// A list of strings that make up all the possible operators in a specific version of Python. @@ -69,7 +68,8 @@ pub const OPERATORS: &[&str] = &[ "<>", ]; -pub static OPERATOR_RE: Lazy = Lazy::new(|| { +thread_local! { +pub static OPERATOR_RE: Regex = { // sort operators so that we try to match the longest ones first let mut sorted_operators: Box<[&str]> = OPERATORS.into(); sorted_operators.sort_unstable_by_key(|op| usize::MAX - op.len()); @@ -82,4 +82,5 @@ pub static OPERATOR_RE: Lazy = Lazy::new(|| { .join("|") )) .expect("regex") -}); +}; +} diff --git a/native/libcst/src/tokenizer/text_position/mod.rs b/native/libcst/src/tokenizer/text_position/mod.rs index 2e58600a2..fece9e3d3 100644 --- a/native/libcst/src/tokenizer/text_position/mod.rs +++ b/native/libcst/src/tokenizer/text_position/mod.rs @@ -5,14 +5,15 @@ mod char_width; -use once_cell::sync::Lazy; use regex::Regex; use std::fmt; use crate::tokenizer::debug_utils::EllipsisDebug; use char_width::NewlineNormalizedCharWidths; -static CR_OR_LF_RE: Lazy = Lazy::new(|| Regex::new(r"[\r\n]").expect("regex")); +thread_local! { + static CR_OR_LF_RE: Regex = Regex::new(r"[\r\n]").expect("regex"); +} pub trait TextPattern { fn match_len(&self, text: &str) -> Option; @@ -98,7 +99,7 @@ impl<'t> TextPosition<'t> { match match_len { Some(match_len) => { assert!( - !CR_OR_LF_RE.is_match(&rest_of_text[..match_len]), + !CR_OR_LF_RE.with(|r| r.is_match(&rest_of_text[..match_len])), "matches pattern must not match a newline", ); true diff --git a/native/libcst/src/tokenizer/whitespace_parser.rs b/native/libcst/src/tokenizer/whitespace_parser.rs index f09ce7895..be5b77528 100644 --- a/native/libcst/src/tokenizer/whitespace_parser.rs +++ b/native/libcst/src/tokenizer/whitespace_parser.rs @@ -7,7 +7,6 @@ use crate::nodes::{ Comment, EmptyLine, Fakeness, Newline, ParenthesizableWhitespace, ParenthesizedWhitespace, SimpleWhitespace, TrailingWhitespace, }; -use once_cell::sync::Lazy; use regex::Regex; use thiserror::Error; @@ -15,10 +14,12 @@ use crate::Token; use super::TokType; -static SIMPLE_WHITESPACE_RE: Lazy = - Lazy::new(|| Regex::new(r"\A([ \f\t]|\\(\r\n?|\n))*").expect("regex")); -static NEWLINE_RE: Lazy = Lazy::new(|| Regex::new(r"\A(\r\n?|\n)").expect("regex")); -static COMMENT_RE: Lazy = Lazy::new(|| Regex::new(r"\A#[^\r\n]*").expect("regex")); +thread_local! { + static SIMPLE_WHITESPACE_RE: Regex = Regex::new(r"\A([ \f\t]|\\(\r\n?|\n))*").expect("regex"); +static NEWLINE_RE: Regex = Regex::new(r"\A(\r\n?|\n)").expect("regex"); +static COMMENT_RE: Regex = Regex::new(r"\A#[^\r\n]*").expect("regex"); +static NEWLINE_RE_2: Regex = Regex::new(r"\r\n?|\n").expect("regex"); +} #[allow(clippy::upper_case_acronyms, clippy::enum_variant_names)] #[derive(Error, Debug, PartialEq, Eq)] @@ -73,11 +74,8 @@ impl<'a> Config<'a> { break; } } - let default_newline = Regex::new(r"\r\n?|\n") - .expect("regex") - .find(input) - .map(|m| m.as_str()) - .unwrap_or("\n"); + let default_newline = + NEWLINE_RE_2.with(|r| r.find(input).map(|m| m.as_str()).unwrap_or("\n")); Self { input, @@ -200,9 +198,8 @@ pub fn parse_empty_lines<'a>( } pub fn parse_comment<'a>(config: &Config<'a>, state: &mut State) -> Result>> { - if let Some(comment_match) = - COMMENT_RE.find(config.get_line_after_column(state.line, state.column_byte)?) - { + let newline_after = config.get_line_after_column(state.line, state.column_byte)?; + if let Some(comment_match) = COMMENT_RE.with(|r| r.find(newline_after)) { let comment_str = comment_match.as_str(); advance_this_line( config, @@ -216,9 +213,8 @@ pub fn parse_comment<'a>(config: &Config<'a>, state: &mut State) -> Result(config: &Config<'a>, state: &mut State) -> Result>> { - if let Some(newline_match) = - NEWLINE_RE.find(config.get_line_after_column(state.line, state.column_byte)?) - { + let newline_after = config.get_line_after_column(state.line, state.column_byte)?; + if let Some(newline_match) = NEWLINE_RE.with(|r| r.find(newline_after)) { let newline_str = newline_match.as_str(); advance_this_line( config, @@ -350,10 +346,11 @@ pub fn parse_simple_whitespace<'a>( let capture_ws = |line, col| -> Result<&'a str> { let x = config.get_line_after_column(line, col); let x = x?; - Ok(SIMPLE_WHITESPACE_RE - .find(x) - .expect("SIMPLE_WHITESPACE_RE supports 0-length matches, so it must always match") - .as_str()) + Ok(SIMPLE_WHITESPACE_RE.with(|r| { + r.find(x) + .expect("SIMPLE_WHITESPACE_RE supports 0-length matches, so it must always match") + .as_str() + })) }; let start_offset = state.byte_offset; let mut prev_line: &str;