diff --git a/Misc/NEWS.d/next/Library/2026-04-19-23-29-38.gh-issue-148762.HSCJka.rst b/Misc/NEWS.d/next/Library/2026-04-19-23-29-38.gh-issue-148762.HSCJka.rst new file mode 100644 index 00000000000000..e7e3de7a96cbd3 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-04-19-23-29-38.gh-issue-148762.HSCJka.rst @@ -0,0 +1,2 @@ +Multiline regexes starting with a caret, such as ``re.compile("^foo", +re.MULTILINE)``, now run significantly faster. diff --git a/Modules/_sre/sre_lib.h b/Modules/_sre/sre_lib.h index 6e6ae46f05a50f..fadfc8c0a1a061 100644 --- a/Modules/_sre/sre_lib.h +++ b/Modules/_sre/sre_lib.h @@ -1848,6 +1848,29 @@ SRE(search)(SRE_STATE* state, SRE_CODE* pattern) ptr++; RESET_CAPTURE_GROUP(); } + } else if (pattern[0] == SRE_OP_AT && + pattern[1] == SRE_AT_BEGINNING_LINE) { + /* pattern is anchored at the start of a line (MULTILINE "^"). + Only the start of the string and the character after a linebreak + can match, so jump from one line start to the next instead of + trying SRE(match) at every position. */ + end = (SRE_CHAR *)state->end; + TRACE(("|%p|%p|SEARCH AT_BEGINNING_LINE\n", pattern, ptr)); + state->start = state->ptr = ptr; + status = SRE(match)(state, pattern, 1); + state->must_advance = 0; + while (status == 0) { + /* skip to the next linebreak ... */ + while (ptr < end && !SRE_IS_LINEBREAK(*ptr)) + ptr++; + if (ptr >= end) + return 0; + ptr++; /* ... and step past it, onto a line start */ + RESET_CAPTURE_GROUP(); + TRACE(("|%p|%p|SEARCH AT_BEGINNING_LINE\n", pattern, ptr)); + state->start = state->ptr = ptr; + status = SRE(match)(state, pattern, 0); + } } else { /* general case */ assert(ptr <= end);