From: Mark Wooding Date: Wed, 18 Sep 2024 18:57:55 +0000 (+0100) Subject: more stuff found lying about X-Git-Url: https://www.chiark.greenend.org.uk/ucgi/~mdw/git/wordchain/commitdiff_plain/3bbec421c59fe8feefefe89a5f33cf47ea4a08c8?ds=sidebyside more stuff found lying about --- diff --git a/.gitignore b/.gitignore index 51f1cd9..7cbc3fe 100644 --- a/.gitignore +++ b/.gitignore @@ -1,10 +1,17 @@ *.d *.o +*.rlib +/DATA.* /REF.* /massif.* +/summary.dat +/sumtab.tex /time.* /chain.c++-* +/chain.golang /chain.libavl-* +/chain.mLib-* /chain.qptrie-* +/chain.rust-* /chain.sgt-* /chain.xyla-* diff --git a/Makefile b/Makefile index d652279..b0d633a 100644 --- a/Makefile +++ b/Makefile @@ -76,7 +76,7 @@ $(RUSTLIB_OBJS): lib%.rlib: \ --crate-type=rlib --crate-name=$* $($*_RUSTFLAGS)) SRCEXT += go -GOBLD = go build +GOBUILD = go build TREELIBS = VPATH = @@ -166,6 +166,37 @@ mLib_VARIANTS = sym mLib_LIBS = -lmLib mLib-sym_CFLAGS = -DTREE=MLIB_SYM +c++_VARIANTS += map +c++-map_CXXFLAGS = -DTREE=CXX_MAP +c++_VARIANTS += uomap +c++-uomap_CXXFLAGS = -DTREE=CXX_UOMAP + +rust_VARIANTS += btree +rust-btree_RUSTFLAGS = --cfg 'feature="btree"' +rust_VARIANTS += hash +rust-hash_RUSTFLAGS = --cfg 'feature="hash"' + +ALL_VARIANTS += golang +TARGETS += chain.golang +CLEAN += chain.golang +chain.golang: chain.go + $(call v-tag,GOBUILD)$(GOBUILD) -o $@ $< + +ALL_VARIANTS += perl python + +lisp_VARIANTS = clisp cmucl ccl ecl sbcl +measure-lisp = \ + runlisp -L$1 chain.lisp -Ttime.$2.new $(DICT) >chain.$2.out +define def-lisp-variant +ALL_VARIANTS += lisp-$1 +lisp-$1_PROGRAM = chain.lisp +lisp-$1_MEASURE = $$(call measure-lisp,$1,$$*) +endef +$(foreach v,$(lisp_VARIANTS), \ + $(eval $(call def-lisp-variant,$v))) + +-include local.mk + define def-c-variant ALL_VARIANTS += $1-$2 TARGETS += chain.$1-$2 @@ -183,11 +214,6 @@ $(foreach t,$(TREELIBS), \ $(foreach v,$($t_VARIANTS), \ $(eval $(call def-c-variant,$t,$v)))) -c++_VARIANTS += map -c++-map_CXXFLAGS = -DTREE=CXX_MAP -c++_VARIANTS += uomap -c++-uomap_CXXFLAGS = -DTREE=CXX_UOMAP - define def-c++-variant ALL_VARIANTS += c++-$1 TARGETS += chain.c++-$1 @@ -203,11 +229,6 @@ endef $(foreach v,$(c++_VARIANTS), \ $(eval $(call def-c++-variant,$v))) -rust_VARIANTS += btree -rust-btree_RUSTFLAGS = --cfg 'feature="btree"' -rust_VARIANTS += hash -rust-hash_RUSTFLAGS = --cfg 'feature="hash"' - define def-rust-variant ALL_VARIANTS += rust-$1 TARGETS += chain.rust-$1 @@ -221,25 +242,6 @@ endef $(foreach v,$(rust_VARIANTS), \ $(eval $(call def-rust-variant,$v))) -ALL_VARIANTS += golang -TARGETS += chain.golang -CLEAN += chain.golang -chain.golang: chain.go - $(call v-tag,GOBLD)$(GOBLD) -o $@ $< - -ALL_VARIANTS += perl python - -lisp_VARIANTS = cmucl ccl ecl sbcl -measure-lisp = \ - runlisp -L$1 chain.lisp -Ttime.$2.new $(DICT) >chain.$2.out -define def-lisp-variant -ALL_VARIANTS += lisp-$1 -lisp-$1_PROGRAM = chain.lisp -lisp-$1_MEASURE = $$(call measure-lisp,$1,$$*) -endef -$(foreach v,$(lisp_VARIANTS), \ - $(eval $(call def-lisp-variant,$v))) - CLEAN += ref ref: $(DICT) $(call v-tag,GEN)./chain.perl $(DICT) >$@.new && mv $@.new $@ @@ -259,6 +261,10 @@ $(MEASURES): measure/%: $$(or $$($$*_PROGRAM),chain.$$*) ref warm-cache $(call v-tag,MEASURE)$(or $($*_MEASURE),$(COMMON_MEASURE)) && \ ./chkref chain.$*.out ref && mv time.$*.new time.$* +list: + for i in $(ALL_VARIANTS); do echo $$i; done +.PHONY: list + .PHONY: measure $(MEASURES) all: $(TARGETS) diff --git a/chain.c b/chain.c index 583b2a0..707097b 100644 --- a/chain.c +++ b/chain.c @@ -230,7 +230,7 @@ static int word_nav(struct xyla_bst_nodecls *cls, # endif # define INSERT do { \ if (TREE_PROBE(0, &root, word_nav, &word, &path)) { \ - printf(";; duplicate `%s'\n", buf); \ + fprintf(stderr, ";; duplicate `%s'\n", buf); \ free(node); node = 0; \ } else { \ EXTRA_NODE_SETUP; \ @@ -363,9 +363,10 @@ static bool my_Tnextl(Tbl *t, const char **k_inout, size_t *sz_inout, # endif # elif QPTRIE_ITER == QPITER_LIST # define DECL_ITER \ - struct node *list = 0, **tail = &list, *next -# define INSERT_LIST \ - do { node->next = 0; *tail = node; tail = &node->next; } while (0) + struct node *all = 0, **all_tail = &all +# define INSERT_LIST do { \ + node->next = 0; *all_tail = node; all_tail = &node->next; \ + } while (0) # endif # define DECLS \ @@ -381,7 +382,7 @@ static bool my_Tnextl(Tbl *t, const char **k_inout, size_t *sz_inout, # if QPTRIE_ITER == QPITER_LIST # define ITERATE \ - for (next = list; node = next, node; next = next->next) + for (next = all; node = next, node; next = next->next) # else # define ITERATE \ for (cur.p = 0, cur.n = 0; \ @@ -399,7 +400,7 @@ static bool my_Tnextl(Tbl *t, const char **k_inout, size_t *sz_inout, # if QPTRIE_ITER == QPITER_LIST # define FREE do { \ - for (node = list; node; node = next) { \ + for (node = all; node; node = next) { \ tbl = Tsetl(tbl, node->w.p, node->w.n, 0); \ next = node->next; free(node); \ } \ @@ -429,7 +430,7 @@ static int node_cmp(void *a, void *b) # define INSERT do { \ if (add234(root, node) != node) { \ - printf(";; duplicate `%s'\n", buf); \ + fprintf(stderr, ";; duplicate `%s'\n", buf); \ free(node); node = 0; \ } \ } while (0) @@ -467,7 +468,7 @@ static void free_node(void *n, void *arg) { free(n); } # define INSERT do { \ p = TREE_PROBE(tab, node); if (!p) bail("out of memory"); \ if (*p != node) { \ - printf(";; duplicate `%s'\n", buf); \ + fprintf(stderr, ";; duplicate `%s'\n", buf); \ free(node); node = 0; \ } \ } while (0) @@ -487,11 +488,11 @@ static void free_node(void *n, void *arg) { free(n); } # define DECLS \ sym_table tab; \ - struct node *list = 0, **tail = &list, *next; \ + struct node *all = 0, **all_tail = &all; \ unsigned foundp # define INIT do { \ - unihash_setkey(&unihash_global, /*getpid()*/ 31022); \ + unihash_setkey(&unihash_global, getpid()); \ sym_create(&tab); \ } while (0) @@ -499,11 +500,15 @@ static void free_node(void *n, void *arg) { free(n); } # define INSERT do { \ node = sym_find(&tab, buf, word.n, sizeof(*node), &foundp); \ - if (foundp) { bail(";; duplicate `%s'\n", buf); node = 0; } \ - else { node->next = 0; *tail = node; tail = &node->next; } \ + if (!foundp) { \ + node->next = 0; *all_tail = node; all_tail = &node->next; \ + } else { \ + fprintf(stderr, ";; duplicate `%s'\n", buf); \ + free(node); node = 0; \ + } \ } while (0) -# define ITERATE for (next = list; node = next, node; next = next->next) +# define ITERATE for (next = all; node = next, node; next = next->next) # define FOCUS do ; while (0) @@ -562,7 +567,7 @@ static void print_chain(struct node *node) int main(void) { DECLS; - struct node *node, *parent; + struct node *node, *parent, *winners, **winners_tail, *next; struct word word; char buf[WORDMAX]; int ch, max, nlen, plen; @@ -582,20 +587,28 @@ int main(void) CHECK; - max = 0; + max = 0; winners_tail = &winners; ITERATE { FOCUS; /* fprintf(stderr, ";; ponder `%.*s'\n", WORDLEN(node), WORDPTR(node)); */ if (WORDLEN(node) <= 1) - parent = 0; + continue; else { GETPREFIX; /* fprintf(stderr, ";; search `%.*s'\n", word.n, word.p); */ - parent = LOOKUP; + parent = LOOKUP; if (!parent) continue; } node->up = parent; nlen = node->len; - while (parent) { + for (;;) { + if (!parent) { + if (nlen >= max) { + if (nlen > max) + { max = nlen; *winners_tail = 0; winners_tail = &winners; } + *winners_tail = node; winners_tail = &node->right; + } + break; + } plen = parent->len; nlen++; if (plen > nlen) break; @@ -608,10 +621,12 @@ int main(void) node = parent; parent = node->up; } } - if (nlen > max) max = nlen; } - ITERATE if (node->len == max) { print_chain(node); putchar('\n'); } + for (*winners_tail = 0, node = winners; node; node = next) { + next = node->right; node->right = 0; + print_chain(node); putchar('\n'); + } FREE; return (0); } diff --git a/chain.cc b/chain.cc index a21dc89..f0ed1ce 100644 --- a/chain.cc +++ b/chain.cc @@ -90,6 +90,7 @@ int main(void) } } + WordNode *winners, **winners_tail = &winners; int max = 0; for (auto p = map.begin(); p != map.end(); ++p) { @@ -99,16 +100,25 @@ int main(void) const std::string *word = &wnode->first; Node *node = &wnode->second; if (word->length() <= 1) - pwnode = 0; + continue; else { /* std::fprintf(stderr, ";; search `%.*s'\n", w->first.length(), w->first.c_str()); */ std::string prefix{*word, 0, word->length() - 1}; - auto p = map.find(prefix); pwnode = p == map.end() ? 0 : &*p; + auto p = map.find(prefix); if (p == map.end()) continue; + pwnode = &*p; } int nlen = node->len; node->up = pwnode; - while (pwnode) { + for (;;) { + if (!pwnode) { + if (nlen >= max) { + if (nlen > max) + { max = nlen; *winners_tail = 0; winners_tail = &winners; } + *winners_tail = wnode; winners_tail = &node->right; + } + break; + } Node *parent = &pwnode->second; nlen++; int plen = parent->len; if (plen > nlen) @@ -122,10 +132,12 @@ int main(void) wnode = pwnode; node = &wnode->second; pwnode = node->up; } } - if (nlen > max) max = nlen; } - for (auto p = map.begin(); p != map.end(); ++p) - if (p->second.len == max) { print_chain(&*p); std::putchar('\n'); } + *winners_tail = 0; + for (WordNode *next, *wnode = winners; wnode; wnode = next) { + next = wnode->second.right; wnode->second.right = 0; + print_chain(wnode); std::putchar('\n'); + } return (0); } diff --git a/chain.go b/chain.go index 8f1b450..2d6d2c2 100644 --- a/chain.go +++ b/chain.go @@ -43,29 +43,44 @@ func main() { } max := 0 + var winners *wordnode = nil; winners_tail := &winners for word, node := range words { wlen := len(word) if wlen <= 1 { continue } parent := words[word[:wlen - 1]] + if parent == nil { continue } node.up = parent - for parent != nil { - nlen := node.len + 1 - plen := parent.len - if nlen < plen { + nlen := node.len + for { + if parent == nil { + if nlen >= max { + if nlen > max { + max = nlen + *winners_tail = nil + winners_tail = &winners + } + *winners_tail = node + winners_tail = &node.right + } break - } else if nlen > plen { - parent.down = nil - parent.len = nlen - if nlen > max { max = nlen } } - if parent.down != node { + nlen += 1; plen := parent.len + if plen > nlen { + break + } else if plen == nlen { node.right = parent.down; parent.down = node + break } + parent.down = node; node.right = nil + parent.len = nlen node = parent; parent = node.up } } - for _, node := range words { - if node.len == max { print_chain(node); fmt.Printf("\n") } + *winners_tail = nil + var next *wordnode + for node := winners; node != nil; node = next { + next = node.right; node.right = nil + print_chain(node); fmt.Printf("\n") } } diff --git a/chain.lisp b/chain.lisp index 214091c..1f0abaa 100755 --- a/chain.lisp +++ b/chain.lisp @@ -12,7 +12,7 @@ (defun word-chain (stream) (declare (optimize speed)) (let ((map (make-hash-table :test #'equal)) - (max 0)) + (max 0) (winners nil)) (loop (let ((line (read-line stream nil))) @@ -29,61 +29,62 @@ (defun word-chain (stream) (gethash (subseq word 0 (1- len)) map)))) (nlen (node-len node))) - (setf (node-up node) parent) - (loop - (unless parent (return)) - (incf nlen) - (let ((plen (node-len parent))) - ;;(format t ";; node `~A' ~D parent `~A' ~D~%" - ;; (node-word node) (1- nlen) - ;; (node-word parent) plen) - (cond ((> plen nlen) - ;;(format t ";; longer chain through `~A'~%" - ;; (node-word (node-down parent))) - (return)) - ((= plen nlen) - (setf (node-right node) (node-down parent) - (node-down parent) node) - (return)) - (t - ;;(format t ";; new longest chain ~A > ~A~%" - ;; nlen plen) - (setf (node-down parent) node - (node-right node) nil - (node-len parent) nlen - node parent - parent (node-up node)))))) - (when (> nlen max) (setf max nlen)))) + (when parent + (setf (node-up node) parent) + (loop + (unless parent + (when (>= nlen max) + (when (> nlen max) + (setf max nlen + winners nil)) + (push node winners)) + (return)) + (incf nlen) + (let ((plen (node-len parent))) + ;;(format t ";; node `~A' ~D parent `~A' ~D~%" + ;; (node-word node) (1- nlen) + ;; (node-word parent) plen) + (cond ((> plen nlen) + ;;(format t ";; longer chain through `~A'~%" + ;; (node-word (node-down parent))) + (return)) + ((= plen nlen) + (setf (node-right node) (node-down parent) + (node-down parent) node) + (return)) + (t + ;;(format t ";; new longest chain ~A > ~A~%" + ;; nlen plen) + (setf (node-down parent) node + (node-right node) nil + (node-len parent) nlen + node parent + parent (node-up node))))))))) map) - (maphash (lambda (word node) - (declare (ignore word) - (type node node)) - - (when (= (node-len node) max) - (labels ((print-chain (node) - (cond ((null (node-right node)) - (write-string (node-word node)) - (let ((down (node-down node))) - (when down - (write-char #\space) - (print-chain down)))) - (t - (write-string "{ ") - (loop - (write-string (node-word node)) - (let ((down (node-down node))) - (when down - (write-char #\space) - (print-chain down))) - (let ((right (node-right node))) - (unless right (return)) - (write-string " | ") - (setf node right))) - (write-string " }"))))) - (print-chain node) - (terpri)))) - map))) + (labels ((print-chain (node) + (cond ((null (node-right node)) + (write-string (node-word node)) + (let ((down (node-down node))) + (when down + (write-char #\space) + (print-chain down)))) + (t + (write-string "{ ") + (loop + (write-string (node-word node)) + (let ((down (node-down node))) + (when down + (write-char #\space) + (print-chain down))) + (let ((right (node-right node))) + (unless right (return)) + (write-string " | ") + (setf node right))) + (write-string " }"))))) + (dolist (node winners) + (print-chain node) + (terpri))))) #+runlisp-script (let ((args (uiop:command-line-arguments)) diff --git a/chain.perl b/chain.perl index 06a2d39..7e52860 100755 --- a/chain.perl +++ b/chain.perl @@ -11,13 +11,22 @@ while (<>) { $WORD{$_} = [$_, 1, undef, undef, undef]; } -my $MAX = 0; +my $MAX = 0; my @WINNERS; WORD: while (my ($word, $node) = each %WORD) { my $len = length $word; - my $parent = $len <= 1 ? undef : $WORD{substr $word, 0, $len - 1}; + next WORD unless $len >= 2; + my $parent = $WORD{substr $word, 0, $len - 1}; + next WORD unless defined $parent; $node->[UP] = $parent; my $nlen = $node->[LEN]; - UP: while (defined $parent) { + UP: for (;;) { + unless (defined $parent) { + if ($nlen >= $MAX) { + if ($nlen > $MAX) { $MAX = $nlen; @WINNERS = (); } + push @WINNERS, $node; + } + last UP; + } my $plen = $parent->[LEN]; $nlen++; if ($plen > $nlen) { last UP; } @@ -51,5 +60,4 @@ sub print_chain ($) { } } -for my $node (values %WORD) - { if ($node->[LEN] == $MAX) { print_chain $node; print "\n"; } } +for my $node (@WINNERS) { print_chain $node; print "\n"; } diff --git a/chain.python b/chain.python index 54f8fac..b02f475 100755 --- a/chain.python +++ b/chain.python @@ -15,13 +15,19 @@ for line in SYS.stdin: line = line.rstrip("\n") WORDS[line] = Node(line) -MAX = 0 +MAX = 0; WINNERS = [] for node in WORDS.values(): word = node.word; wlen = len(word) - if wlen <= 1: parent = None - else: parent = WORDS.get(word[:wlen -1]) + if wlen <= 1: continue + parent = WORDS.get(word[:wlen - 1]) + if not parent: continue node.up = parent; nlen = node.len - while parent is not None: + while True: + if parent is None: + if nlen >= MAX: + if nlen > MAX: MAX = nlen; WINNERS = [] + WINNERS.append(node) + break plen = parent.len; nlen += 1 if plen > nlen: break @@ -50,5 +56,4 @@ def print_chain(node): SYS.stdout.write(" | ") SYS.stdout.write(" }") -for node in WORDS.values(): - if node.len == MAX: print_chain(node); SYS.stdout.write("\n") +for node in WINNERS: print_chain(node); SYS.stdout.write("\n") diff --git a/chain.rs b/chain.rs index a75b2a6..7d93487 100644 --- a/chain.rs +++ b/chain.rs @@ -72,7 +72,7 @@ fn main() -> io::Result<()> { } } - let mut max = 0; + let mut max = 0; let mut winners = Vec::with_capacity(8); for node in map.values() { //stdout.write_all(b";; ponder `")?; //stdout.write_all(node.word)?; @@ -80,32 +80,44 @@ fn main() -> io::Result<()> { let mut node: &Node = node; let mut parent; let n = node.word.len(); - if n <= 1 { parent = None; } - else { parent = map.get(&node.word[0 .. n - 1]).map(|n| &**n); } + if n <= 1 { continue; } + parent = map.get(&node.word[0 .. n - 1]).map(|n| &**n); + if let None = parent { continue; } node.up.set(parent); let mut nlen = node.len.get(); - while let Some(parent_node) = parent { - let plen = parent_node.len.get(); nlen += 1; - if plen > nlen - { break; } - else if plen == nlen { - node.right.set(parent_node.down.get()); - parent_node.down.set(Some(node)); - break; - } else { - parent_node.down.set(Some(node)); - node.right.set(None); - parent_node.len.set(nlen); - node = parent_node; parent = node.up.get(); + loop { + match parent { + None => { + if nlen >= max { + if nlen > max { + max = nlen; + winners.clear(); + } + winners.push(node); + } + break; + } + Some(parent_node) => { + let plen = parent_node.len.get(); nlen += 1; + if plen > nlen + { break; } + else if plen == nlen { + node.right.set(parent_node.down.get()); + parent_node.down.set(Some(node)); + break; + } else { + parent_node.down.set(Some(node)); + node.right.set(None); + parent_node.len.set(nlen); + node = parent_node; parent = node.up.get(); + } + } } } - if nlen > max { max = nlen; } } - for node in map.values() { - if node.len.get() == max - { print_chain(node, &mut stdout)?; stdout.write(b"\n")?; } - } + for node in winners + { print_chain(node, &mut stdout)?; stdout.write(b"\n")?; } Ok(()) } diff --git a/chkref b/chkref index 4076d96..ef7ed9e 100755 --- a/chkref +++ b/chkref @@ -39,7 +39,7 @@ sub parse_list ($) { push @chains, parse_chain(@words); } $f->close; - return join("|", @chains); + return join("|", sort { $a cmp $b } @chains); } die "usage: $0 A B" unless @ARGV == 2;