diff --git a/tests/test_re_by_construction.py b/tests/test_re_by_construction.py index 8755bf6..d352284 100644 --- a/tests/test_re_by_construction.py +++ b/tests/test_re_by_construction.py @@ -10,6 +10,7 @@ from functools import partial from hypothesis import given, strategies as st +from hypothesis.errors import InvalidArgument IS_PYPY = hasattr(sys, "pypy_version_info") @@ -154,13 +155,24 @@ def non_matching_string(self, draw, state): if not any(isinstance(x, tuple) for x in self.elements): # easy case, only chars return draw(st.characters(blacklist_characters=self.elements)) + + # Now, we *could* iterate through to get the set of all allowed characters, + # but that would be pretty slow. Instead, we just get the highest and lowest + # allowed codepoints and stay outside that interval, blacklisting individual + # characters. If we allow both chr(0) and chr(maxunicode), just give up. chars = "".join(x for x in self.elements if not isinstance(x, tuple)) - range_stops = [ord(x[1]) for x in self.elements if isinstance(x, tuple)] - max_stop = max(range_stops) - res = draw( - st.characters(min_codepoint=max_stop + 1, blacklist_characters=chars) - ) - return res + low = min(ord(x[0]) for x in self.elements if isinstance(x, tuple)) + high = max(ord(x[1]) for x in self.elements if isinstance(x, tuple)) + strat = st.nothing() + if low > 0: + strat |= st.characters(max_codepoint=low - 1, blacklist_characters=chars) + if high < sys.maxunicode: + strat |= st.characters(min_codepoint=high + 1, blacklist_characters=chars) + try: + return draw(strat) + except InvalidArgument: + assert strat.is_empty + raise CantGenerateNonMatching def build_re(self): res = []