Python 3370

Ich wollte, dass dies so gut wie möglich funktioniert, und brachte sogar Abwechslung zum Laufen (mit korrekter Überprüfung der Doppelzählung!). Soweit ich weiß, funktioniert das für alles außer für Lookarounds (weil das verrückt wäre).

Wenn Sie eine eigene Lösung schreiben, können Sie meine Methoden nach Belieben anwenden / verbessern.

Code:

R=range;L=len;E=enumerate;I=int;F="Infinity";S=sum;H=chr;D=ord;J=lambda l:''.join(l);U=lambda s:J([H(i)if H(i) not in s else''for i in R(1,128)]);s=' \n\t';c=J((H(i)if H(i)not in s else'')for i in R(1,32));d,l,u=[J(H(i)for i in R(*n))for n in [[48,59],[97,123],[65,91]]];p='`~!@#$%^&*()-_=+[{]}\\|;:\'",<.>/?';Y={'\\s':s,'\\S':c+s+p+u+l+d,'\\d':d,'\\D':U(d),'\\w':u+l+'_','\\W':U(u+l+'_'),'[:alnum:]':u+l+d,'[:alpha:]':u+l,'[:ascii:]':U(''),'[:blank:]':' \t','[:cntrl:]':c,'[:digit:]':d,'[:graph:]':p+d+l+u,'[:lower:]':l,'[:print:]':p+d+l+u+s,'[:punct:]':p,'[:space:]':s,'[:upper:]':u,'[:word:]':l+u+'_','[:xdigit:]':d+'ABCDEF'};C=lambda l,n:[d+[l[i]]for i in R(L(l))for d in C(l[i+1:],n-1)]if n>0 else[[]];O=lambda x:S([all((c in s)for s in [(e[1]if any(e[1] not in Y for e in x)else Y[e[1]])if e[0]=='e'else(e[1]if e[0]=='t'else((Y[e[1]]if e[1]in Y else B(e[1]))if e[0]=='['else''))for e in x])for c in Y['[:ascii:]']])
def X(r):
 x=[];l=0;c=''
 for h in r:
    l+=I(h in'([')-I(h in')]')
    if l<1 and'|'==h:x+=[c];c=''
    else:c+=h
 x+=[c]
 if L(x)>1:
    o=0;m=[];u=[]
    for e in x:
     p,b=X(e)
     if p==F:return F,[]
     o+=p;m+=[('(',b)];u+=[b]
    return o-S([(-1)**(i%2)*T(s) for i in R(2,L(u)+1)for s in C(u,i)]),[('|',m)]
 u=[];o=[];m=[];w=1
 while r!='':
    h=r[0];z=0
    if h in'([{':
     l=1;i=1
     while l>0:k=r[i];l+=(k==h)-(k=={'(':')','[':']','{':'}'}[h]);i+=1
     u+=[(h,r[1:i-1])];z=i
    elif h=='\\':u+=[('e','\\'+eval("'\\%s'"%r[1]))]if r[1]in'nt'else[('e','\\'+r[1])];z=2
    elif h in ['*','+','?']:u+=[('{',h)];z=1
    elif h in'^$':return 0
    elif h=='.':u+=[('[','[:ascii:]')];z=1
    else:u+=[('t',h)];z=1
    r=r[z:]
 for t,g in u:
    if t=='(':p,b=X(g);o+=[p];m+=[('(',b)]
    elif t=='e':o+=[L(Y[g])]if g in Y else[1]
    elif t=='[':o+=[N(g)]
    elif t=='{':
     n=o[-1]
     try:o[-1]=S([n**r for r in Q(g)])
     except:return F,[]
    elif t=='t':o+=[1]
    if t!='(':m+=[(t,g)]
 for c in o:
    if c==F:return F,[]
    w*=c
 return w,m
def N(s):
 if s in Y:return L(Y[s])
 if(s[0],s[-1])==('[',']'):return 1
 n=(s[0]=='^');a=0
 if n:s=s[1:]
 while s!='':
    if L(s)>=3 and s[1]=='-':a+=D(s[2])-D(s[0])+1;s=s[3:];continue
    a+=1;s=s[1:]
 return 256*n+(-1)**n*a
def Q(s):
 if s=='?':return[0,1]
 if s[0]in'*+':return None
 if ','in s:
    l,h=s.split(',')
    return None if h==''else R(I(l),I(h)+1)
 return[I(s)]
def B(s):
 n=(s[0]=='^')
 if n:s=s[1:]
 a='';w=''
 while s!='':
    h=s[0]
    if 3<=L(s)and'-'==s[1]:
     for i in R(D(s[0]),D(s[2])+1):a+=H(i)
     s=s[3:];continue
    a+=h;s=s[1:]
 return J([c*(c not in a)for c in Y['[:ascii:]']])if n else a
def T(x):
 if all(e==[] for e in x):return 1
 for i,e in E(x):
    if L(e)>=2 and e[1][0]=='{':return S([T([[e[0]]*n+e[2:]]+x[:i]+x[i+1:])for n in Q(e[1][1])])
    if L(e)>=1:
     if e[0][0] == '(':return T([e[0][1]+e[1:]]+x[:i]+x[i+1:])
     if e[0][0]== '|':
        t=S(T([[s]+e[1:]]+x[:i]+x[i+1:])for s in e[0][1])
        u=[[s]for s in e[0][1]]
        return t-S((-1)**(j%2)*T(b)for j in R(2,L(u)+1)for b in C(u,j))
 if any(e==[]for e in x):return 0
 return O([e[0]for e in x])*T([e[1:]for e in x])
r=raw_input();e=[];l=0;c=''
for h in r:
 l+=I(h in'([')-I(h in')]')
 if l<1 and'|'==h:e+=[c];c=''
 else:c+=h
e+=[c];o=[];x=[]
for f in e:
 if '^'!=f[0]or'$'!=f[-1]:print F;quit()
 n,m=X(f[1:-1])
 if n==F:print F;quit()
 o+=[n];x+=[m]
print S(o)-S([(-1)**(i%2)*T(s) for i in R(2,L(x)+1)for s in C(x,i)])

Ungolfed:

controlchars = ''
for i in range(1,32):
    if chr(i) not in '\t\n':
        controlchars += chr(i)

CLASSES={
'\\s':' \t\n',
'\\S':controlchars+'!"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~',
'\\d':'0123456789',
'\\D':controlchars+'\n\t !"#$%&\'()*+,-./:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~',
'\\w':'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_',
'\\W':controlchars+'\n\t !"#$%&\'()*+,-./:;<=>?@[\\]^`{|}~',
'[:alnum:]':'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789',
'[:alpha:]':'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ',
'[:ascii:]':controlchars+'\n\t !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~',
'[:blank:]':' \t',
'[:cntrl:]':controlchars,
'[:digit:]':'0123456789',
'[:graph:]':'!"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~',
'[:lower:]':'abcdefghijklmnopqrstuvwxyz',
'[:print:]':'\n\t !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~',
'[:punct:]':'`~!@#$%^&*()-_=+[{]}\\|;:\'",<.>/?',
'[:space:]':' \t\n',
'[:upper:]':'ABCDEFGHIJKLMNOPQRSTUVWXYZ',
'[:word:]':'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_',
'[:xdigit:]':'0123456789ABCDEF'
}

DELIMIT = {
'(':')',
')':'(',
'[':']',
']':'[',
'{':'}',
'}':'{'
}

def combos(lst,num):
    if num == 0:
        return [[]]
    combs = []
    for i in range(len(lst)):
        for c in combos(lst[i+1:],num-1):
            combs.append([lst[i]]+c)
    return combs

def count_regex(regex):
    exprs = []
    level = 0
    current = ''
    for char in regex:
        if char in '([':
            level += 1
        if char in ')]':
            level -= 1
        if char == '|' and level == 0:
            exprs.append(current)
            current = ''
        else:
            current += char
    exprs.append(current)

    comps = []
    expanded = []
    for e in exprs:
        if (e[0] != '^' or e[-1] != '$'):
            return 'Infinity'
        num,member = count_expr(e[1:-1])
        if num == 'Infinity':
            return 'Infinity'
        comps.append(num)
        expanded.append(member)

    total = sum(comps)
    for i in range(2,len(expanded)+1):
        for subset in combos(expanded,i):
            if i % 2 == 0:
                total -= count_doubles(subset)
            else:
                total += count_doubles(subset)
    return total

def count_expr(expr):
    exprs = []
    level = 0
    current = ''
    for char in expr:
        if char in '([':
            level += 1
        if char in ')]':
            level -= 1
        if char == '|' and level == 0:
            exprs.append(current)
            current = ''
        else:
            current += char
    exprs.append(current)

    if len(exprs) != 1:
        comps = 0
        members = []
        sub = []
        for e in exprs:
            comp,memb = count_expr(e)
            if comp == 'Infinity':
                return 'Infinity',[]
            comps += comp
            members.append(('(',memb))
            sub.append(memb)

        for i in range(2,len(sub)+1):
            for subset in combos(sub,i):
                if i % 2 == 0:
                    comps -= count_doubles(subset)
                else:
                    comps += count_doubles(subset)

        return comps,[('|',members)]


    sub = []
    while expr != '':
        char = expr[0]
        if char in ['(','[','{']:
            level = 1
            i = 1
            while level > 0:
                nchar = expr[i]
                if nchar == char: level += 1
                if nchar == DELIMIT[char]: level -= 1
                i += 1
            sub.append((char,expr[1:i-1]))
            expr = expr[i:]
        elif char == '\\':
            if expr[1] == 'n':
                sub.append(('e','\\'+'\n'))
                expr = expr[2:]
            elif expr[1] == 't':
                sub.append(('e','\\'+'\t'))
                expr = expr[2:]
            else:
                sub.append(('e','\\'+expr[1]))
                expr = expr[2:]
        elif char in ['*','+','?']:
            sub.append(('{',char))
            expr = expr[1:]
        else:
            if char in '^$':
                return 0
            if char == '.':
                sub.append(('[','[:ascii:]'))
                expr = expr[1:]
            else:
                sub.append(('t',char))
                expr = expr[1:]

    components = []
    members = []
    for t,string in sub:
        if t == '(':
            comp,memb = count_expr(string)
            components.append(comp)
            members.append(('(',memb))
        elif t == 'e':
            if string in CLASSES:
                components.append(len(CLASSES[string]))
            else:
                components.append(1)
        elif t == '[':
            components.append(count_class(string))
        elif t == '{':
            num = components[-1]
            try:
                components[-1] = sum([num**r for r in count_quantifier(string)])
            except TypeError:
                return 'Infinity',[]
        elif t == 't':
            components.append(1)

        if t != '(':
            members.append((t,string))

    total = 1
    for c in components:
        if c == 'Infinity':
            return 'Infinity',[]
        total *= c
    return total,members

def count_class(string):
    if string in CLASSES:
        return len(CLASSES[string])
    if string[0] == '[' and string[-1] == ']':
        return 1
    negate = (string[0] == '^')
    if negate: string = string[1:]
    avail_count = 0
    while string != '':
        char = string[0]
        if len(string) >= 3:
            if string[1] == '-':
                first,last = string[0],string[2]
                avail_count += ord(last)-ord(first)+1
                string = string[3:]
                continue
        avail_count += 1
        string = string[1:]
    if negate:
        return 256-avail-count
    return avail_count

def count_quantifier(string):
    if string == '?':
        return [0,1]
    if string[0] in '*+':
        return None
    if ',' in string:
        low,high = string.split(',')
        if high == '':
            return None
        return range(int(low),int(high)+1)
    return [int(string)]

def bracket_string(string):
    negate = (string[0] == '^')
    if negate: string = string[1:]
    avail = ''
    while string != '':
        char = string[0]
        if len(string) >= 3:
            if string[1] == '-':
                first,last = string[0],string[2]
                for i in range(ord(first),ord(last)+1):
                    avail += chr(i)
                string = string[3:]
                continue
        avail += char
        string = string[1:]
    if negate:
        new = ''
        for c in CLASSES['[:ascii:]']:
            if c not in avail:
                new += c
        return new
    return avail

def overlap(es):
    chars=['' for i in range(len(es))]
    for i,e in enumerate(es):
        if e[0] == 'e':
            if any(e[1] not in CLASSES for e in es):
                chars[i] = e[1]
            else:
                chars[i] = CLASSES[e[1]]

    for i,e in enumerate(es):
        if e[0] == 't':
            chars[i] = e[1]

    for i,e in enumerate(es):
        if e[0] == '[':
            if e[1] in CLASSES:
                chars[i] = CLASSES[e[1]]
            else:
                chars[i] = bracket_string(e[1])

    total = 0
    for c in CLASSES['[:ascii:]']:
        has = True
        for chs in chars:
            if c not in chs:
                has = False
                break
        if has:
            total += 1
    return total

def count_doubles(exprs):   
    if all(e==[] for e in exprs):
        return 1

    for i,expr in enumerate(exprs):
        if len(expr) >= 2 and expr[1][0] == '{':
            rng = count_quantifier(expr[1][1])
            total = 0
            for n in rng:
                total += count_doubles([ [expr[0]]*n+expr[2:] ] + exprs[:i] + exprs[i+1:])
            return total

    for i,expr in enumerate(exprs):
        if len(expr) >= 1 and expr[0][0] == '(':
            return count_doubles([ expr[0][1]+expr[1:] ] + exprs[:i] + exprs[i+1:] )

    if any(e==[] for e in exprs):
        return 0

    for i,expr in enumerate(exprs):
        if expr[0][0] == '|':
            total = 0
            subs = []
            for sub_expr in expr[0][1]:
                subs.append([sub_expr])
                total += count_doubles([ [sub_expr]+expr[1:] ] + exprs[:i] + exprs[i+1:])

            for j in range(2,len(subs)+1):
                for subset in combos(subs,j):
                    if j % 2 == 0:
                        total -= count_doubles(subset)
                    else:
                        total += count_doubles(subset)

            return total

    over = overlap([e[0] for e in exprs])
    if over == 0:
        return 0
    return  over * count_doubles([e[1:] for e in exprs])

reg = raw_input('Regex: ')
print count_regex(reg)

Hier sind einige relevante Testfälle, die ich bestätigt habe:

Regex: ^[A-Za-z0-9]{8}$
218340105584896

Regex: ^([a-b]*)$
Infinity

Regex: ^([0-9]|[a-e])([a-e]|[A-E])$|^[a-f]{2}$
161

Regex: ^[a-z]{2}$|^[0-9]?[a-z]{2,3}$|^a[a-z]?$
200773

Regex: ^([a-z]|[a-e]|[A-E]|[3-4]|[D-G])$
35

Regex: ^\(.)\(.)\2\1$
16129*

Regex: ^(a)\1?$
2

Regex: ^[[:space:]]$|^\t$
3

* Dies ist bei Golfspielern und Nicht-Golfspielern tatsächlich anders, da es einen Unterschied von einem Zeichen bei den als gültig definierten Ascii gibt. Ich glaube, der Golfspieler ist der Richtigere.

Um zu bestätigen, dass weitere Tests durchgeführt werden konnten, teilen Sie mir bitte alle Fehler mit (ich wäre ehrlich gesagt nicht überrascht, wenn es nicht wenige gäbe).

KSab
quelle

Das ist doch verrückt .

Joe Z.

Eine positive Bewertung wert. xfix, meintest du das?

Tomsmeding

@ace Oh, danke, dass du die Syntax hervorgehoben hast. Ich bin relativ neu auf der Website und wusste nicht, dass Sie das tun können.

KSab

Bitte. Um die Syntaxhervorhebung hier zu verwenden, können Sie einen HTML-Kommentar (für ein Codeteil) oder (für alle Codes in einem Beitrag) hinzufügen , wobei lang-codeder Sprachcode Ihrer Sprache ist. Stellen Sie sicher, dass das Format identisch ist (mit Leerzeichen usw.). Die Liste der Sprachcodes finden Sie hier . (Sie müssen ein wenig nach unten scrollen.) Ich bin mir nicht sicher, ob die Verwendung von Tags funktioniert. Halten Sie sich also einfach an die Sprachcodes. :)

user12205

KSab ist gegangen, wohin sich niemand trauen würde ...

Rob

Bewerten Sie die Anzahl der möglichen Zeichenfolgen, mit denen ein regulärer Ausdruck übereinstimmen kann

Antworten:

Python 3370