implement Sort; include "sys.m"; sys: Sys; include "bufio.m"; bufio: Bufio; Iobuf: import bufio; include "draw.m"; include "arg.m"; include "exception.m"; include "rand.m"; rand: Rand; Sort: module { init: fn(nil: ref Draw->Context, args: list of string); }; MemLines: con 50000; # the maximum number of lines held in memory TooManyTemps: con 20; # begin coalescing tempfiles after this many have been created stderr: ref Sys->FD; cmp: ref fn(nil, nil: string): int; rflag := 0; init(nil : ref Draw->Context, args : list of string) { bio : ref Bufio->Iobuf; sys = load Sys Sys->PATH; stderr = sys->fildes(2); bufio = load Bufio Bufio->PATH; if (bufio == nil) { sys->fprint(stderr, "sort: cannot load %s: %r\n", Bufio->PATH); raise "fail:bad module"; } arg := load Arg Arg->PATH; if (arg == nil) { sys->fprint(stderr, "sort: cannot load %s: %r\n", Arg->PATH); raise "fail:bad module"; } sys->pctl(Sys->NEWPGRP, nil); exc := load Exception Exception->PATH; if(exc->setexcmode(Exception->PROPAGATE) < 0) { sys->fprint(stderr, "setexcmode: %r\n"); raise "fail:setexcmode"; } exc = nil; cmp = lcmp; arg->init(args); arg->setusage("usage: sort [-n] [file]"); while ((opt := arg->opt()) != 0) { case opt { 'n' => cmp = ncmp; 'r' => rflag = 1; * => arg->usage(); } } args = arg->argv(); if (len args > 1) arg->usage(); if (args != nil) { bio = bufio->open(hd args, Bufio->OREAD); if (bio == nil) { sys->fprint(stderr, "sort: cannot open %s: %r\n", hd args); raise "fail:open file"; } } else bio = bufio->fopen(sys->fildes(0), Bufio->OREAD); a := array[MemLines] of string; tmps: list of list of ref Iobuf = nil :: nil; eof := 0; for(;;) { for(i := 0; i < len a; i++) { if(eof = ((a[i] = bio.gets('\n')) == nil)) break; } mergesort(a, array[i] of string, i); if(eof) { a = a[:i]; break; } tmps = coalesce(tmps); tmp := tempfile(); for(j := 0; j < i; j++) tmp.puts(a[j]); tmp.flush(); # this shouldn't be necessary tmps = (tmp :: hd tmps) :: tl tmps; #sys->fprint(stderr, "len tmps %d %d\n", len tmps, len flatten(tmps)); } stdout := bufio->fopen(sys->fildes(1), Bufio->OWRITE); merge(stdout, flatten(tmps), a); stdout.close(); } mergesort(a, b: array of string, r: int) { if(r <= 1) return; m := (r-1)/2 + 1; mergesort(a[0:m], b[0:m], m); mergesort(a[m:r], b[m:r], r-m); b[0:] = a[0:r]; for ((i, j, k) := (0, m, 0); i < m && j < r; k++) { if(cmp(b[i], b[j]) > 0) a[k] = b[j++]; else a[k] = b[i++]; } if (i < m) a[k:] = b[i:m]; else if (j < r) a[k:] = b[j:r]; } lcmp(a, b: string): int { ret: int; # if(a == b) # return 0; if(a < b) ret = -1; else ret = 1; if(rflag) ret = -ret; return ret; } ncmp(a, b: string): int { ret: int; (x, y) := (int a, int b); # if(x == y) # return 0; if(x < y) ret = -1; else ret = 1; if(rflag) ret = -ret; return ret; } randstr(n: int): string { if(rand == nil) { rand = load Rand Rand->PATH; rand->init(sys->pctl(0, nil)); } s: string; for(i := 0; i < n; i++) s[len s] = rand->rand('Z' - 'A') + 'A'; return s; } tempfile(): ref Bufio->Iobuf { for(i := 0; i < 10; i++) { name := sys->sprint("/tmp/sort.%d.%s", sys->pctl(0, nil), randstr(8)); if((fd := sys->create(name, Sys->ORDWR|Sys->ORCLOSE|Sys->OEXCL, 8r600)) != nil) return bufio->fopen(fd, Bufio->ORDWR); } sys->fprint(stderr, "sort: create tempfile: %r\n"); raise "fail:tempfile"; } coalesce(tmps: list of list of ref Iobuf): list of list of ref Iobuf { if(tmps == nil || len (tmps1 := hd tmps) < TooManyTemps) return tmps; merge(tmp2 := tempfile(), tmps1, nil); if((tmps = tl tmps) == nil) return nil :: (tmp2 :: nil) :: nil; return nil :: coalesce((tmp2 :: hd tmps) :: tl tmps); } diskreader(b: ref Iobuf, c: chan of string) { while((s := b.gets('\n')) != nil) c <-= s; c <-= nil; } memreader(a: array of string, c: chan of string) { for(i := 0; i < len a; i++) c <-= a[i]; c <-= nil; } mkreaders(tmps: list of ref Iobuf, mem: array of string): list of chan of string { rs: list of chan of string; for(; tmps != nil; tmps = tl tmps) { t := hd tmps; if(t.seek(big 0, Bufio->SEEKSTART) < big 0) { sys->fprint(stderr, "sort: seek tempfile: %r\n"); raise "fail:seek"; } r := chan of string; spawn diskreader(t, r); rs = r :: rs; } if(mem != nil) { r := chan of string; spawn memreader(mem, r); rs = r :: rs; } return rs; } mkmergertree(cs: list of chan of string): chan of string { if(len cs == 1) return hd cs; ds: list of chan of string; for(; len cs >= 2; cs = tl tl cs) { d := chan of string; spawn merger(hd cs, hd tl cs, d); ds = d :: ds; } if(len cs == 1) ds = hd cs :: ds; return mkmergertree(ds); } merger(a, b: chan of string, c: chan of string) { aa := <-a; bb := <-b; while(aa != nil && bb != nil) { if(cmp(aa, bb) < 0) { c <-= aa; aa = <-a; } else { c <-= bb; bb = <-b; } } if(aa == nil) { aa = bb; a = b; } c <-= aa; while((c <-= <-a) != nil) ; } merge(out: ref Iobuf, ins: list of ref Iobuf, mem: array of string) { c := mkmergertree(mkreaders(ins, mem)); while((s := <-c) != nil) out.puts(s); out.flush(); # this shouldn't be necessary } flatten(xs: list of list of ref Iobuf): list of ref Iobuf { ys: list of ref Iobuf; for(; xs != nil; xs = tl xs) { for(zs := hd xs; zs != nil; zs = tl zs) ys = hd zs :: ys; } return ys; }