Server : Apache System : Linux indy02.toastserver.com 3.10.0-962.3.2.lve1.5.85.el7.x86_64 #1 SMP Thu Apr 18 15:18:36 UTC 2024 x86_64 User : palandch ( 1163) PHP Version : 7.1.33 Disable Function : NONE Directory : /opt/cloudlinux/venv/lib64/python3.11/site-packages/guppy/etc/ |
""" Support functions for RE simplification. This module is intended for use by the RE module. It is in a separate module to keep RE itself cleaner since the algorithm, 'rectangle selection', is a quite separate part that depends on some tricky heuristics. The primary entry function is chooserects(lines, gauges) It chooses 'the best' rectangles from lines to base simplification on. A weight on atoms is given by gauges. pr() gives some example usages of chooserects. """ from guppy.sets import immbitset, mutbitset, immbitrange class Rect(object): __slots__ = 'width', 'lines', 'gainmemo', 'lnos', 'all_lines', 'common_part' def __init__(self, width, lines): self.width = width self.lines = lines assert not (width and len(lines) == 1) self.gainmemo = {} def init2(self, lnobyid, all_lines): self.all_lines = all_lines self.lnos = immbitset([lnobyid[id(line)] for line in self.lines]) self.common_part = self.get_common_part() def reducelines(self, lnos): # Reduce lines of self by removing some lines # Argument: lnos, a 'set' of line numbers to remove olnos = self.lnos lnos = olnos & ~lnos if lnos != olnos: self.lnos = lnos self.lines = [self.all_lines[lno] for lno in lnos] if len(lnos) == 1: self.width = len(self.lines[0]) def get_lines(self, pickednos=0): lines = [] for i in self.lnos & ~ pickednos: lines.append(self.all_lines[i]) return lines def __repr__(self): return '<\n dir = %d\n width = %d\n lnos = %s\n lines = %s\n>' % ( self.dir, self.width, list(self.lnos), self.lines) class LeftRect(Rect): __slots__ = () dir = 0 def get_common_part(self): return self.lines[0][:self.width] def get_uncommons(self, pickednos=0): uc = [] for line in self.get_lines(pickednos): uc.append(line[self.width:]) return uc class RightRect(Rect, object): __slots__ = () dir = -1 def get_common_part(self): lo = -self.width if lo == 0: return [] return self.lines[0][lo:] def get_uncommons(self, pickednos=0): uc = [] hi = -self.width if hi == 0: hi = None for line in self.get_lines(pickednos): uc.append(line[:hi]) return uc def sum_gauge(gauge, lst): global hits, misses if gauge is None: return len(lst) else: gain = 0 for x in lst: gain += gauge(x) return gain def cmp_gauged(xs, ys, gauges): for gauge in gauges: gx = sum_gauge(gauge, xs) gy = sum_gauge(gauge, ys) c = cmp(gx, gy) if c: return c return 0 class InducedRect: def __init__(self, s, lines, lnos): self.s = s self.width = s.width self.all_lines = s.all_lines self.lines = lines self.lnos = lnos class InducedRightRect(InducedRect, RightRect): pass class InducedLeftRect(InducedRect, LeftRect): pass def brect(lines): if len(lines) <= 1: return [LeftRect(0, lines)] newrects = [LeftRect(0, lines), RightRect(0, lines)] donerects = [] while newrects: oldrects = newrects newrects = [] for r in oldrects: width = r.width while 1: is_done = 0 d = {} pos = width ^ r.dir for line in r.lines: if width < len(line): d.setdefault(line[pos], []).append(line) else: is_done = 1 if is_done or len(d) != 1: break r.width = width = width + 1 donerects.append(r) width += 1 for k, v in list(d.items()): if len(v) > 1: new_r = r.__class__(width, v) newrects.append(new_r) return donerects def choose(rects, lines=[], gauges=[None]): def induce(r): uncommons = r.get_uncommons() if len(uncommons) < 2: return irs = [] for s in rects: if s.dir != r.dir: continue pss = [] uncs = s.get_uncommons(pickednos) lnos = s.lnos & ~pickednos assert len(uncs) == len(lnos) for unc, lno in zip(uncs, lnos): if unc in uncommons: pss.append(lno) if len(pss) == len(uncommons): pslnos = immbitset(pss) pss = [lines[lno] for lno in pss] if s.dir == -1: c = InducedRightRect else: c = InducedLeftRect ir = c(s, pss, pslnos) irs.append(ir) if irs: news.extend(irs) def overlap(r): rlnos = r.lnos tonews = [] for s in rects: if s is r: continue if s.dir != r.dir: continue slnos = s.lnos if not (slnos & rlnos): continue slnos &= ~ pickednos if not slnos: # remove continue scom = s.common_part if not scom: continue for t in rects: if t is s: continue if t.dir == s.dir: continue tlnos = t.lnos & ~pickednos if (tlnos & rlnos): continue olnos = tlnos & slnos if not olnos: continue if slnos == tlnos: continue tcom = t.common_part if not tcom: continue c = cmp_gauged(scom, tcom, gauges) if c > 0: continue break else: # s is ok tonews.append(s) rects.remove(s) news.extend(tonews) def picknext(): while 1: if news: r = news[0] del news[0] else: r = None for s in list(rects): slnos = s.lnos - pickednos if not slnos: rects.remove(s) continue sn = len(slnos) - 1 sw = s.width if r is not None: if not sw: break if not sn: continue if rwn: rmemo = r.gainmemo smemo = s.gainmemo c = 0 for gauge in gauges: try: gr = rmemo[gauge] except KeyError: gr = sum_gauge(gauge, r.common_part) rmemo[gauge] = gr gr *= rn try: gs = smemo[gauge] except KeyError: gs = sum_gauge(gauge, s.common_part) smemo[gauge] = gs gs *= sn c = gr - gs if c: break if c >= 0: continue r = s rlnos = slnos if not sw: break rn = sn rw = sw rwn = sn * sw if r is not None: rects.remove(r) if r is not None: r.reducelines(pickednos) if r.lnos: return r def cmpinit(x): return x.width, x.dir, x.lnos[0] if gauges[0] is None: gauges = gauges[1:] lnobyid = dict([(id(line), i) for i, line in enumerate(lines)]) orects = rects rects = list(orects) for r in rects: r.init2(lnobyid, lines) rects.sort(key=cmpinit, reverse=True) allnos = immbitrange(len(lines)) pickednos = mutbitset() pickedrects = [] news = [] while pickednos != allnos: r = picknext() pickednos |= r.lnos pickedrects.append(r) induce(r) overlap(r) return pickedrects def chooserects(lines, gauges=[None]): rects = brect(lines) choosen = choose(rects, lines, gauges) return choosen def pr(): x = chooserects(['abc', 'ade']) x = chooserects(['abc', 'abe', 'ace', 'xby']) print(x) x = chooserects(['ab1', 'ab2', 'ac3', 'ac4']) print(x) # Case where.. (from right).. : # the total gain of two rects (bfbf+cfcf = 4) is greater than the gain of another # overlapping rect (ffff == 3), although the individual gains are less (= 2). # But in this case at least, the end result should likely become the same x = chooserects([ '1bf', '2bf', '3cf', '4cf']) print(x) # Case where it chooses twice.. x = chooserects([ 'abc', 'abd', 'bcx', 'bdy' ]) print('TW', x) # Case where it didn't choose enough rects x = ([ 'abc', 'abd', 'e' ]) print(chooserects(x)) # Case where it should prefer one side or the other # i.e. left traditionally print(chooserects(['abc', 'axc'])) # Case where it should give a width 0 rect print(chooserects(['a', ''])) # Case with overlap print(chooserects(['abcd', 'abce', 'a', 'f'])) # Case with induce print(chooserects(['abcd', 'abce', 'd', 'e'])) print(chooserects(['auvw', 'buvw', 'a', 'b'])) print(chooserects(['axuvw', 'bxuvw', 'axy', 'bxy', 'cy'])) def tmany(): for i in range(100): x = chooserects(['abc', 'abe', 'ace', 'xby'])