DB_btree_wdix.stl
"B-tree variant for word index, with 2 cumulants (no. of occs, last wd. in parent), refcounts."
-- file B_tree_for_bigs_wdix.stl package B_tree_for_wdix; -- B-trees, realized as objects -- *************************************************************************************************************** -- ****** B-tree variant for word index, with 2 cumulants (no. of occs, last wd. in parent), refcounts ****** -- *************************************************************************************************************** -- Refcounts can be ignored except in the wix_set_comp, wix_insert, split_node, share_right, join_left, -- and join_left routines. -- This B-tree represents the words-to-number-of-occurences index -- Given an indexable word associated with a database, this tree indexes to the start, in the word-occurences -- bigstring, of the corresponding occurences list. It stores (as cumuants) the cumulated length -- of all such occurences lists, and the first 10 or 11 characters of each indexed word. -- To deal with indexed words larger than this, we use a special 'self-defining' encoding, keep the first part -- of each excessivley long word in the tree, and store the remainder of the word (which can be of any -- length up to 32K characters) ast the start of the corresponding occurences section. -- (For compatibility with existing code, a zone whose length is a multiple of 4 bytes is used for this.) -- The self-defining encoding used is as follows. Words of up to 128 bytes are padded with zero bytes -- to at least 11 bytes (but if longer than 11 bytes, always to one less than a multiple of 4 bytes), -- and are preceded by an extra byte giving the string length. Words of more than 128 bytes -- (but of no more than 32K bytes) are preceded by two bytes giving their string length (the first byte being -- incremented by 128 to distinguish it from the preceding case), and padded with zero bytes to 2 less than -- a multiple of 4 bytes. -- This index stores words in the alphabetical order of these encoded forms. So to search for a word W -- of up to 11 bytes, we use its encoded form to search as usual; when the first larger word FLW -- has been found we know immediately from the first 12 bytes of FLW whether it equals W. We search -- for words of more than 11 words in the same way. However, when the first potentially larger word is found -- (examining the lead 12 bytes only) we may have to examine its additional bytes (stored at the start -- of the corresponding occurences section) to determine whether this is indeed larger, and if -- not my need to conduct a binary search among words with the same 12 lead bytes to find the first largest. -- However, such coincidences of word length and first 10 actual bytes should be infrequent. -- Obviously the treatment of such longer words is substantially less efficient than that of words of up to -- 11 bytes in length. To avoid searches on longer words, the database fructions that transform of records -- into lists of words should be designed to compress words where possible, e.g. by compressing integers, -- and by using auxiliary encoding dictionaries to replace words by word numbers. const wix_code_pts := {"wix_last1", "wix_last2", "wix_nlast2", "wix_nlast1", "wix_nc1", "wix_nc2", "wix_set_copy", "wix_last1", "wix_last2", "wix_nc1", "wix_comp1", "wix_ndel_nc", "wix_ndel_comp", "wix_ndel_comp_last", "wix_ndel_comp_nlast", "wix_del_nc", "wix_del_nc_have", "wix_del_comp", "wix_del_comp_last", "wix_del_comp_nlast", "wix_del_comp_enough", "wix_del_comp_canpull", "wix_del_comp_canjoin", "wix_del_comp_npop", "wix_del_pop", "wix_in_copy", "wix_in_nend2", "wix_in_nend1", "wix_in_nend3", "wix_in_end", "wix_in_end_nc", "wix_in_end_nc_nos", "wix_in_end_nc_split", "wix_in_end_comp", "wix_in_end_comp_nos", "wix_in_end_comp_split", "wix_in_end_comp_nosthis", "wix_in_end_comp_splitthis", "wix_in_nend_nc", "wix_in_nend_nc_nos", "wix_in_nend_nc_split", "wix_in_nend_comp", "wix_in_nend_comp_nos", "wix_in_nend_comp_split", "wix_in_nend_comp_nosthis", "wix_in_nend_comp_splitthis", "wix_halves_nc", "wix_halves_comp", "wix_pull_left", "wix_pull_right", "wix_share_nc", "wix_share_comp", "wix_share_copy", "wix_share_copy2", "wix_share_move_left", "wix_share_move_right", "wix_join_left_comp", "wix_join_left_nc", "wix_join_left_copy", "wix_join_right_comp", "wix_join_right_nc", "wix_join_right_copy"}; -- code points to be traversed var debug_val := 0; var prior_debug_c := 0,debug_c := 0; -- global variables for debugging var tree_level; -- tree level for recursive insertion process procedure wix_comp(rec,j); -- fetch of component containing cumulant j procedure wix_comp2(rec,j); -- fetch of component containing second cumulant j procedure wix_comp_cum(rec,x); -- fetch of component containing cumulant x, with cumulants procedure wix_comp_cum2(rec,x); -- fetch of component containing second cumulant x, with cumulants procedure wix_set_comp(rw rec,w,x); -- assignment of the first component whose cumulant is at least w; w must be in range procedure wix_set_comp2(rw rec,w,x); -- assignment of the first component whose second cumulant is at least w procedure wix_insert(rw rec,j,x); -- insertion before j-th component; or at the end if j = OM procedure wix_insert2(rw rec,j,x); -- insertion before component with second cumulant at least j; or at end (j = OM) procedure wix_get_cum(rec); -- get the cum value (string length) for this node procedure wix_get_cum2(rec); -- get the record id string cum value of the final child of this node procedure encode_wd(wd); -- puts word into length-prefixed, self-defining form described above procedure decode_wd(stg); -- recovers word from length-prefixed, self-defining form described above procedure wix_make_from_tuple(t); -- make B-tree representation from tuple (DEBUGGING ONLY) procedure wix_dump(rec); -- get tuple from B-tree representation (DEBUGGING ONLY) procedure num_leaves(a_tree); -- (DEBUGGING ONLY) procedure wix_check_tree_structure(tree); -- recursive check of tree structure (DEBUGGING ONLY) end B_tree_for_wdix; package body B_tree_for_wdix; -- B-trees, realized as objects use setldb,byteutil,disk_records_pak,db_records,string_utility_pak; const zeroes11 := "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"; const zeroes4 := "\x00\x00\x00\x00"; -- there are two cumulants, the total length of the records and the recid procedure xwix_create(); -- creation routine rec := dr_new_rec(); -- create a new record (at this point, the record is loaded -- dirty, and holds all zeroes; so it is not compound) set_type(rec,wd_index_node_ncr); -- set to non-compound tree return rec; end xwix_create; procedure wix_get_cum(rec); -- get the integer cum value of the final child of this node if int_of_4(rec) = 0 then print("bad rec in wix_get_cum: ",debug_val); stop; end if; if (nc := num_childr(rec)) = 0 then return 0; end if; return get_ch_cum(rec,nc); end wix_get_cum; procedure get_ch_cum(rec,j); -- get the integer cum value for the j-th child of this node the_start := if dr_is_compound(rec) then wix_cum_start else wixnc_cum_start end if; return int_of_5(dr_load(rec)(the_start + (j - 1) * 5..the_start + j * 5 - 1)); end get_ch_cum; procedure set_ch_cum(rec,j,cum_int); -- set the integer cum value for the j-th child of this node the_start := if dr_is_compound(rec) then wix_cum_start else wixnc_cum_start end if; stg := dr_load(rec); -- make sure that this record is loaded stg(the_start + (j - 1) * 5..the_start + j * 5 - 1) := stg_of_5(cum_int); dr_setrecbuf(rec,stg);dr_dirtify(rec); end set_ch_cum; procedure wix_get_cum2(rec); -- get the record id string cum value of the final child of this node if (nc := num_childr(rec)) = 0 then return OM; end if; return get_ch_cum2(rec,nc); end wix_get_cum2; procedure get_ch_cum2(rec,j); -- get the record id string cum value for the j-th child of this node the_start := if dr_is_compound(rec) then wix_cum2_start else wixnc_cum2_start end if; return dr_load(rec)(the_start + (j - 1) * 12..the_start + j * 12 - 1); end get_ch_cum2; procedure set_ch_cum2(rec,j,enc_wd); -- set the record id string cum value for the j-th child of this node the_start := if dr_is_compound(rec) then wix_cum2_start else wixnc_cum2_start end if; stg := dr_load(rec); -- make sure that this record is loaded stg(the_start + (j - 1) * 12..the_start + j * 12 - 1) := enc_wd + (12 - #enc_wd) * "\x00"; dr_setrecbuf(rec,stg);dr_dirtify(rec); end set_ch_cum2; procedure vect_of_children(rec); -- gets vector of children, as a string of 4-byte record numbers if not dr_is_compound(rec) then return ""; end if; nch := num_childr(rec); -- number of children stg:=dr_load(rec); return stg(wix_ch_start..wix_ch_start - 1 + nch * 4); end vect_of_children; procedure set_vect_of_children(rec,stg); -- sets vector of children, from a string of 4-byte record numbers --print("set_vect_of_children: ",hexify(rec)," ",hexify(stg)); if not dr_is_compound(rec) then print("Illegal effort to set vector of children for non_compound node: debug_val = ",debug_val); stop; end if; set_num_childr(rec,(nstg := #stg)/4); -- set the number of children (also loads) missing := wix_cum_start - wix_ch_start - #stg; --if missing < 0 then print("missing,wix_cum_start,wix_ch_start,#stg: ",missing," ",#stg," ",hexify(stg)); stop; end if; stg2:=dr_load(rec); stg2(wix_ch_start..wix_cum_start - 1) := (stg + missing * "\x00"); dr_setrecbuf(rec,stg2);dr_dirtify(rec); end set_vect_of_children; procedure vect_of_cums(rec); -- gets vector of cums, as a string of 5-byte fields the_start := if dr_is_compound(rec) then wix_cum_start else wixnc_cum_start end if; nch := num_childr(rec); -- number of children return dr_load(rec)(the_start..the_start - 1 + nch * 5); end vect_of_cums; procedure set_vect_of_cums(rec,stg); -- sets vector of cumulants, from a string of 5-byte record numbers the_start := if (ic := dr_is_compound(rec)) then wix_cum_start else wixnc_cum_start end if; the_end := if ic then wix_cum2_start else wixnc_cum2_start end if; missing := the_end - the_start - #stg; stg2:=dr_load(rec); -- force load stg2(the_start..the_end - 1) := (stg + missing * "\x00"); dr_setrecbuf(rec,stg2);dr_dirtify(rec); end set_vect_of_cums; procedure vect_of_cums2(rec); -- gets vector of cums, as a string of 4-byte fields the_start := if dr_is_compound(rec) then wix_cum2_start else wixnc_cum2_start end if; nch := num_childr(rec); -- number of children return dr_load(rec)(the_start..the_start - 1 + nch * 12); end vect_of_cums2; procedure set_vect_of_cums2(rec,stg); -- sets vector of second cums, from a string of 12-byte words the_start := if (ic := dr_is_compound(rec)) then wix_cum2_start else wixnc_cum2_start end if; the_end := if ic then wix_cum2_end else wixnc_cum2_end end if; missing := the_end - the_start + 1 - #stg; if missing < 0 then print("Excessively long input stg to set_vect_of_cums2: ",the_start," ",the_end," ",#stg," ",stg); stop; end if; stg2:=dr_load(rec); -- force load stg2(the_start..the_end) := (stg + missing * "\x00"); dr_setrecbuf(rec,stg2);dr_dirtify(rec); end set_vect_of_cums2; procedure num_leaves(a_tree); -- debugging only return if not dr_is_compound(a_tree) then num_childr(a_tree) else 0 +/[num_leaves(voc(a_tree,j)): j in [1..num_childr(a_tree)]] end if; end num_leaves; procedure voc(rec,j); -- n'th member of vector of children stg := dr_load(rec); -- load this string if not dr_is_compound(rec) then print("Illegal effort to access vector of children for non_compound node"); stop; end if; cjstrt := (j - 1) * 4 + wix_ch_start; return stg(cjstrt..cjstrt + 3); -- return child rec end voc; procedure set_voc(rec,j,chrec); -- set n'th member of vector of children cjstrt := (j - 1) * 4 + wix_ch_start; stg:=dr_load(rec); -- force load; then set the first character of the record if not dr_is_compound(rec) then print("Illegal effort to access element of vector of children for non_compound node"); stop; end if; stg(cjstrt..cjstrt + 3) := chrec; dr_setrecbuf(rec,stg);dr_dirtify(rec); end set_voc; procedure wix_make_from_tuple(t); -- make nested representation from tuple --print("make_from_tuple: ",t); if #t <= wixnc_hi_lim then -- put into just one section stg := dr_load(rec := wix_create()); [icums,rids] := convert_tup_sect(t); -- convert to cumulant string form set_vect_of_cums(rec,icums); set_vect_of_cums2(rec,rids); set_num_childr(rec,#t); -- set the nominal number of children return rec; end if; last_used := 0; -- the last component of t already incorporated into a tuple tx := [ ]; for j in [1,wixnc_hi_lim..#t - 2 * (wixnc_hi_lim - 1)] loop -- put the data into a list of non_compound B_tree nodes, saving enough for at least -- one full final section rec := wix_create(); [icums,rids] := convert_tup_sect(t(j..last_used := j + wixnc_hi_lim - 2)); -- convert one section of input to cumulant string form --print("convert_tup_sect B: ",wixnc_hi_lim," ",#rids); set_vect_of_cums(rec,icums); set_vect_of_cums2(rec,rids); set_num_childr(rec,wixnc_hi_lim - 1); -- set the nominal number of children tx with:= rec; end loop; unusedo2 := (#t - last_used)/2; t1 := t(last_used + 1..last_used + unusedo2); t2 := t(last_used + unusedo2 + 1..); rec := wix_create(); [icums,rids] := convert_tup_sect(t1); set_num_childr(rec,unusedo2); -- set the nominal number of children set_vect_of_cums(rec,icums); set_vect_of_cums2(rec,rids); tx with:= rec; rec := wix_create(); [icums,rids] := convert_tup_sect(t2); set_vect_of_cums(rec,icums); set_vect_of_cums2(rec,rids); tx with:= rec; set_num_childr(rec,#rids/12); -- set the nominal number of children t := tx; -- use the converted t compound_now := false; -- otherwise we must chop into sections; bottom level sections are not compound while (nt := #t) > wix_hi_lim loop -- build tree progressively from bottom, continuing as long as list of nodes obtained is too long sections := [t(j..j + wix_hi_lim - 2): j in [1,wix_hi_lim..nt - wix_hi_lim + 2]]; -- these are lists of nodes if (nt mod (wix_hi_lim - 1)) /= 0 then -- may have to rearrange last 2 pieces if #(piece := t((nrv := #sections) * (wix_hi_lim - 1) + 1..)) >= wix_low_lim then sections with:= piece; else -- otherwise rearrange last 2 pieces ntp := #(twix_pieces := sections(nrv) + piece); sections(nrv) := twix_pieces(1..ntpo2 := ntp/2); sections with:= twix_pieces(ntpo2 + 1..); end if; end if; t := [ ]; -- recalculate the vector of items to work with for sec = sections(j) loop -- convert each section into a tree node sec_node := dr_new_rec(); -- make a tree top for this tuple (i.e. this section) set_type(sec_node,wd_index_node_record); -- set the type of the record sec_stg := "" +/ sec; -- convert the list of children to a string set_vect_of_children(sec_node,sec_stg); -- set the list of children cumulate(sec_node); -- initialize the cumulants t with:= sec_node; -- assemble new vector of nodes --print("sec_node: ",sec_node," ",hexify(dr_load(sec_node))); end loop; compound_now := true; -- after first iteration, sections are not compound end loop; rec := dr_new_rec(); -- create a new, top level record t_stg := "" +/t; -- convert the list of children to a string set_type(rec,wd_index_node_record); -- at this point must be compound set_vect_of_children(rec,t_stg); -- these become the top-level children cumulate(rec); -- initialize the final cumulants return rec; end wix_make_from_tuple; procedure convert_tup_sect(t); -- converts a tuple of pairs [wd,occslen] into a pair of strings cum := 0; -- integer cumulant, developed below wds_stg := "" +/ [encode_wd(wd): [wd,occslen] in t]; len_stg := "" +/ [stg_of_5(cum): [wd,occslen] in t | (cum := cum + occslen) >= 0]; --print("convert_tup_sect: ",#wds_stg," ",wds_stg," ",t," ",[#wd: [wd,-] in t]); return [len_stg,wds_stg]; end convert_tup_sect; procedure wix_dump(rec); -- get tuple from nested representation -- is compound representation or direct, depending on whether 'is_compound' flag is set -- note that this shows the vector of components, without their cumulants -- we ignore the cumulants in the tree nodes var indent := 0; return wix_dump_in(rec); -- call inner workhorse procedure wix_dump_in(rec); -- inner workhorse --print("wix_dump: ",hexify(dr_load(rec))); if int_of_4(rec) = 0 then return ["ZERO_NODE"]; end if; if dr_is_compound(rec) then -- compound case indent +:= 1; nc := num_childr(rec); stg := (["\n" + (indent * " ") + "("] +/ [wix_dump_in(voc(rec,j)): j in [1..nc]]) + ["\n" + (indent * " ")+ "[" + str(nc) + "]" + decode_wd(wix_get_cum2(rec)) + ":" + str(wix_get_cum(rec)) + ")"]; indent -:= 1; return stg; end if; -- done with compound case if (ncr := num_childr(rec)) = 0 then return ["(,:0)"]; end if; t := []; -- otherwise we must analyze the non-compound case prev := 0; for j in [1..ncr] loop int := (new := get_ch_cum(rec,j)) - prev; prev := new; decwd := decode_wd(get_ch_cum2(rec,j)); t with := decwd + ":" + str(int); end loop; return ["\n" + (indent * " ") + "("] + t + [decode_wd(wix_get_cum2(rec)) + ":" + str(wix_get_cum(rec)) + ")"]; end wix_dump_in; end wix_dump; procedure encode_wd(wd); -- puts word into length-prefixed, self-defining form described above --print("encode_wd: ",#(char(nc := #wd) + wd + zeroes11(nc + 1..))," ",#zeroes11," ",#wd); if (nc := #wd) < 128 then return char(nc) + wd + -- encoding of string up to 128 bytes in length if nc <= 11 then zeroes11(nc + 1..) elseif (ncmod := (nc + 1) mod 4) = 0 then "" else zeroes4(ncmod + 1..) end if; end if; len_chars := char(nc/256 + 128) + char(nc mod 256); return len_chars + wd + if (ncmod := (nc + 2) mod 4) = 0 then "" else zeroes4(ncmod + 1..) end if; end encode_wd; procedure decode_wd(stg); -- recovers word from length-prefixed, self-defining form described above -- the case of strings of more than 11 but no more than 128 bytes is represented twice in this -- preliminary version to allow the 'rest of the string' to be fetched if necessary if (wdlen := abs(stg(1))) <= 11 then return stg(2..wdlen + 1); elseif wdlen < 128 then return stg(2..wdlen + 1); -- but first get the rest of the string else wdlen := 256 * (wdlen - 128) + abs(stg(2)); return stg(3..wdlen + 2); -- but first get the rest of the string end if; end decode_wd; procedure int_of_12(stg); -- convert a 12-byte string to an integer (for comparisons only) sum := 0; ns := #stg; for j in [1..12] loop sum := 256 * sum + if j > ns then 0 else abs(stg(j)) end if; end loop; return sum; end int_of_12; procedure wix_comp(rec,j); -- fetch of component containing cumulant j return if (ccix := comp_cum_ix(rec,1,j)) = OM then OM else ccix(1..2) end if; end wix_comp; procedure wix_comp2(rec,j); -- fetch of component containing second cumulant j return if (ccix := comp_cum_ix(rec,2,j)) = OM then OM else ccix(1..2) end if; end wix_comp2; procedure wix_comp_cum(rec,x); -- fetch of x-th component, with cumulant return comp_cum_ix(rec,1,x); end wix_comp_cum; procedure wix_comp_cum2(rec,x); -- fetch of component containing second cumulant x, with cumulant return comp_cum_ix(rec,2,if x = OM then OM else encode_wd(x) end if); end wix_comp_cum2; procedure comp_cum_ix(rec,srch_on,x); -- fetch of x-th component by search on specified cumulant -- returns value in the form [rid,len,cum_len] -- component should be found by binary search -- search for the first index component with cumulant past -- the specified x; Return OM if there is none such. -- NOTE: this should be by binary search -- find the first node for which a cumulant >= j if x = OM then -- want last node j := num_childr(rec); if srch_on = 1 then -- Note: only needed in non-compound case the_cum := get_ch_cum(rec,j); pass("wix_last1"); else pass("wix_last2"); the_cum := get_ch_cum2(rec,j); end if; else if srch_on = 2 then iofx := int_of_12(x); pass("wix_nlast2"); else pass("wix_nlast1"); end if; if not (exists j in [1..num_childr(rec)] | if srch_on = 1 then (the_cum := get_ch_cum(rec,j)) >= x else int_of_12(the_cum := get_ch_cum2(rec,j)) >= iofx end if) then --print("******** return OM: ",x," ",nc := num_childr(rec)," ",wix_dump(rec)," ",if nc > 0 then get_ch_cum(rec,nc) else "NONE" end if); return OM; -- desired node not found end if; end if; prev_cum := if j = 1 then 0 else get_ch_cum(rec,j - 1) end if; if not dr_is_compound(rec) then -- if node is not compound we have what we want if srch_on = 1 then -- the integer cum was calculated above pass("wix_nc1"); return [decode_wd(get_ch_cum2(rec,j)),the_cum - prev_cum,the_cum]; else -- the rec id was calculated above int_cum := get_ch_cum(rec,j); pass("wix_nc2"); return [decode_wd(the_cum),int_cum - prev_cum,int_cum]; end if; end if; -- otherwise we deal with the compound case res := comp_cum_ix(voc(rec,j),srch_on,if x = OM or srch_on = 2 then x else x - prev_cum end if); -- continue the search recursively [wd,noccs,lencum] := res; -- decode the result returned recursively return [wd] + [noccs,prev_cum + lencum]; end comp_cum_ix; procedure wix_set_comp(rw rec,w,x);-- assignment of the first component whose cumulant is at least w if x /= OM then x := [encode_wd(x(1)),x(2)]; end if; set_comp_ix(rec,1,w,x); end wix_set_comp; procedure wix_set_comp2(rw rec,w,x);-- assignment of the first component whose second cumulant is at least w if x /= OM then x := [encode_wd(x(1)),x(2)]; end if; if w = OM then set_comp_ix(rec,1,w,x); else set_comp_ix(rec,2,encode_wd(w),x); end if; end wix_set_comp2; procedure set_comp_ix(rw rec,srch_on,w,x); -- assignment of the first component whose specified cumulant is at least w; w must be in range -- we must first copy rec if its refcount is greater than 1, and transfer one reference -- from its old to its copied version. The leaves x are supplied as pairs [encoded_stg,number_of_occs] if refcount(int_of_4(rec)) > 1 then -- must copy pass("wix_set_copy"); stg := dr_load(rec); new_r := dr_new_rec(); dr_setrecbuf(new_r,stg);dr_dirtify(new_r); increfs(new_r,1); incref(rec,-1); rec := new_r; -- substitute copy for original end if; ic := dr_is_compound(rec); if srch_on = 2 and w /= OM then wint := int_of_12(w); end if; ncr := num_childr(rec); if w = OM then -- make change in last node srch_on := 1; w := get_ch_cum(rec,ix := ncr); -- use search on first componet to find last child if ix = 0 then print("Change at end of empty tree is not allowed."); stop; end if; elseif not (exists ix in [1..ncr] | if srch_on = 1 then get_ch_cum(rec,ix) >= w else int_of_12(get_ch_cum2(rec,ix)) >= wint end if) then print("WIX - Search index " + str(w) + " out of range in assignment to cumulating vector " + hexify(rec) + " " + str(ncr) + " " + str(ic) + " " + hexify(dr_load(rec))); stop; end if; -- get the local cumulant of the child if ic then old_cum := wix_get_cum(voc(rec,ix)); pass("wix_nc1"); else pass("wix_comp1"); old_cum := get_ch_cum(rec,ix) - if ix = 1 then 0 else get_ch_cum(rec,ix - 1) end if; end if; prev_cum := if ix = 1 then 0 else get_ch_cum(rec,ix - 1) end if; if x /= OM then -- we are not dealing with a deletion if not ic then -- non-compound case; we must add the change in the leaf cumulant -- to the cumulant of all following nodes. cum_change := x(2) - old_cum; update_cums(rec,ix,cum_change); -- update the cums,starting with the given child set_ch_cum2(rec,ix,x(1)); -- modify the cum of the changed component. Note that the following -- second cumulants need not be changed. However, if ix references the -- final child of rec, a cum2 entry in the parent of rec may need to be changed below. pass("wix_ndel_nc"); return; -- done with this case end if; -- otherwise we have the compound case pass("wix_ndel_comp"); the_child := voc(rec,ix); -- the next operation may copy the child set_comp_ix(the_child,srch_on,if srch_on = 1 then w - prev_cum else w end if,x); -- make the change in the child cum_change := wix_get_cum(the_child) - old_cum; set_voc(rec,ix,the_child); -- put the revised, possibly copied child back into position -- the 'update_cums' procedure which we call now must then start with the (properly set) -- cumulant of the preceding node, and then left_add the change in the the cumulant -- to the cumulant of this child to all the subsequent children. This assumes that -- (new_d - old_d) + c + old_d + e = c + new_d + e -- for all cumulant values. This is obviously true for values using associative-commutative -- cumulator functions with an inverse, and also in the (string) case where a + b = b. update_cums(rec,ix,cum_change); -- update the cums,starting with the given child if ix = ncr then set_ch_cum2(rec,ix,wix_get_cum2(the_child)); pass("wix_ndel_comp_last"); else pass("wix_ndel_comp_nlast"); end if; -- the final cumulant of the_child might have been changed by -- the preceding set_comp_ix(the_child,..) operation; see preceding comment. --print("compound self,ix: ",wix_get_cum()," ",ix); return; -- done with the non-deletion case end if; -- otherwise we are dealing with a deletion if not dr_is_compound(rec) then -- non-compound case pass("wix_del_nc"); if num_childr(rec) > 0 then -- need not delete cumulant if no remaining children pass("wix_del_nc_have"); stg := vect_of_cums(rec); -- get the vector of cums stg((ix - 1) * 5 + 1..ix * 5) := ""; -- drop one element set_vect_of_cums(rec,stg); -- put back into place stg := vect_of_cums2(rec); -- get the second vector of cums stg((ix - 1) * 12 + 1..ix * 12) := ""; -- drop one element set_vect_of_cums2(rec,stg); -- put back into place end if; -- delete the child from the string of descendants set_num_childr(rec,num_childr(rec) - 1); -- count down the number of children -- delete the cumulant of the ix-th node. note that this has already been loaded cum_change := -old_cum; update_cums(rec,ix,cum_change); -- update the cums,starting with the appropriate child return; -- done with this case end if; -- otherwise we are dealing with a deletion in a compound case the_child := voc(rec,ix); -- get the child old_ch_leafsum := wix_get_cum(old_child := the_child); set_comp_ix(the_child,srch_on,if srch_on = 1 then w - prev_cum else w end if,OM); -- make the deletion in the child new_ch_leafsum := wix_get_cum(the_child); set_voc(rec,ix,the_child := the_child); -- re-insert the possibly modified child cum_change := new_ch_leafsum - old_ch_leafsum; update_cums(rec,ix,cum_change); -- update the cums,starting with the given child -- since the last child may have been deleted, we also need to update the second cum pass("wix_del_comp"); if ix = ncr then pass("wix_del_comp_last"); set_ch_cum2(rec,ix,wix_get_cum2(the_child)); else pass("wix_del_comp_nlast"); end if; -- now it is possible that the child has lost enough children to have fallen below the required wix_low_lim -- if this has happened, we attempt to share or join children with one of the adjacent siblings if num_childr(the_child) >= wix_low_lim then -- otherwise try to join or share pass("wix_del_comp_enough"); return; end if; if pull_from_left(rec,ix) or pull_from_right(rec,ix) then pass("wix_del_comp_canpull"); return; end if; if join_with_left(rec,ix) or join_with_right(rec,ix) then pass("wix_del_comp_canjoin"); null; end if; -- In the 'join' case, either the left or the right join must work, -- since in this compound case we must have at least one sibling. -- but we must check to see if the node being processed has fallen to -- just one child, and if it has, replace it by its single child. if num_childr(rec) > 1 then pass("wix_del_comp_npop"); return; end if; pass("wix_del_pop"); ch_rec := dr_load(the_ch := voc(rec,1)); dr_load(rec); dr_setrecbuf(rec,ch_rec); dr_dirtify(rec); if dr_is_compound(the_ch) then set_vect_of_children(the_ch,""); end if; -- the children have all moved incref(the_ch,-1); -- child data is inherited from single child, which loses a reference end set_comp_ix; procedure update_cums(rec,ix,cum_change); -- update the cums of this tree,starting with the given child -- add cum_change to all subsequent children for j in [ix..nvc := num_childr(rec)] loop set_ch_cum(rec,j,cum_change + get_ch_cum(rec,j)); end loop; end update_cums; procedure cumulate(rec); -- initalize the cumulants of a node whose children are either leaves or already initialized if not dr_is_compound(rec) then return; end if; -- non-compound nodes are already cumulated the_cum := 0; for j in [1..num_childr(rec)] loop nd := voc(rec,j); -- get the j-th child nd_cum := wix_get_cum(nd); -- cumulant of the final child of the subnode, or occurence string length the_cum := the_cum + nd_cum; set_ch_cum(rec,j,the_cum); -- update the child's cumulant value nd_cum2 := wix_get_cum2(nd); -- cumulant2 of the final child of the subnode, or last element of occurence string set_ch_cum2(rec,j,nd_cum2); -- update the child's cumulant value end loop; end cumulate; procedure wix_insert(rw rec,j,x); -- insertion before j-th component; or at the end if j = OM insert(rec,1,j,x); end wix_insert; procedure wix_insert2(rw rec,j,x); -- insertion before component with second cumulant at least j; insert(rec,2,j,x); -- or at the end if j = OM end wix_insert2; procedure insert(rw rec,srch_on,j,x); -- insertion before j-th component with at least specified cumulant; tree_level := 0; -- note that we are at the top of the tree x := [encode_wd(x(1)),x(2)]; if srch_on = 2 and j /= OM then j := encode_wd(j); end if; insert_in(rec,srch_on,j,x); -- call the inner routine end insert; procedure insert_in(rw rec,srch_on,j,x); -- inner recursion of insertion before j-th component -- with at least specified cumulant; or at the end if j = OM -- component should be found by binary search --print("insert_in: ",str(wix_dump(rec))); if refcount(int_of_4(rec)) > 1 then -- must copy pass("wix_in_copy"); stg := dr_load(rec); new_r := dr_new_rec(); dr_setrecbuf(new_r,stg);dr_dirtify(new_r); increfs(new_r,1); incref(rec,-1); rec := new_r; -- substitute copy for original end if; result := OM; -- in case desired element not found ic := dr_is_compound(rec); ncr := num_childr(rec); the_cum := wix_get_cum(rec); -- get final cumulant of this tree [x_cum2,x_cum] := x; -- cumulant values of the leaf x if j /= OM then -- look for target node of insertion, if any if srch_on = 2 then iofj := int_of_12(j); pass("wix_in_nend2"); else pass("wix_in_nend1"); end if; if exists ix in [1..ncr] | if srch_on = 1 then (cum := get_ch_cum(rec,ix)) >= j else int_of_12(cum := get_ch_cum2(rec,ix)) >= iofj end if then result := [ix,if ix = 1 then 0 else get_ch_cum(rec,ix - 1) end if,cum]; if ic then nd := voc(rec,ix); end if; pass("wix_in_nend3"); else pass("wix_in_end"); result := OM; end if; end if; if result = OM then -- we have insertion at the very end if not ic then -- simply append to vector pass("wix_in_end_nc"); set_num_childr(rec,nvc := ncr + 1); -- add the cumulant of x to the present cumulant of this tree set_ch_cum(rec,nvc,the_cum := the_cum + x_cum); set_ch_cum2(rec,nvc,x_cum2); --print("num_childr(rec): ",nvc," ",num_childr(rec)," ",ncr," ",wix_hi_lim + 1); if tree_level > 0 or nvc <= wixnc_hi_lim then -- no need to split pass("wix_in_end_nc_nos"); return; end if; -- otherwise we must split, and becomes compound pass("wix_in_end_nc_split"); -- note that in this case we are at the very top of the tree th := two_halves(rec); -- get the two halves set_is_compound(rec,true); -- note that it is indeed compound set_vect_of_children(rec,th); -- split into 2 non-compound subtrees set_ch_cum(rec,1,wix_get_cum(voc(rec,1))); set_ch_cum(rec,2,the_cum); set_ch_cum2(rec,1,wix_get_cum2(voc(rec,1))); set_ch_cum2(rec,2,wix_get_cum2(voc(rec,2))); return; -- done with this case end if; -- otherwise we have insertion at the very end of a compound vector pass("wix_in_end_comp"); last_child:= voc(rec,nvc := num_childr(rec)); -- get the last child tree_level +:= 1; -- down a level insert_in(last_child,srch_on,OM,x); -- insert at the end of this last child tree_level -:= 1; -- back up a level set_voc(rec,ncr,last_child); -- insert the possibly modified child back into vect_of_children -- add the cumulant of x to the present cumulant of this tree set_ch_cum(rec,ncr,the_cum := the_cum + x_cum); set_ch_cum2(rec,ncr,x_cum2); if (if dr_is_compound(last_child) then wix_hi_lim else wixnc_hi_lim end if) >= num_childr(last_child) then pass("wix_in_end_comp_nos"); return; -- no need to split the child end if; -- otherwise we must split the last child pass("wix_in_end_comp_split"); split_node(rec,nvc); -- split the nvc-th node into two. we insert an empty node to the right -- of node nvc, and then move half the children of the nvc-th node into the new node if tree_level > 0 or (nvc := num_childr(rec)) <= wix_hi_lim then -- no need to split this node pass("wix_in_end_comp_nosthis"); return; -- done with this case end if; -- otherwise we are at the very top of the tree, so we must split it into two parts and create a new tree level pass("wix_in_end_comp_splitthis"); --print("set_vect_of_children: insertion A: ",dr_is_compound(rec), if not dr_is_compound(rec) then str(wix_dump(rec)) else "" end if); set_vect_of_children(rec,two_halves(rec)); set_ch_cum(rec,1,wix_get_cum(voc(rec,1))); set_ch_cum(rec,2,the_cum); set_ch_cum2(rec,1,wix_get_cum2(voc(rec,1))); set_ch_cum2(rec,2,wix_get_cum2(voc(rec,2))); return; -- done with this case end if; -- end of case of insertion at the very end -- in the remaining cases we have an insertion before one of our nodes [ix,prev_cum,cum] := result; -- decode the result returned, geting the insertion result and the preceding cumulant if not ic then -- insertion at appropriate position in non-compound vector pass("wix_in_nend_nc"); the_cums := vect_of_cums(rec); the_cums2 := vect_of_cums2(rec); the_cums(5 * ix - 4..5 * ix - 5) := if ix = 1 then "\x00\x00\x00\x00\x00" else the_cums(5 * ix - 9..5 * ix - 5) end if; the_cums2(12 * ix - 11..12 * ix - 12) := x_cum2 + (12 - #x_cum2) * "\x00"; --print("#the_cums: ",#the_cums); if #the_cums > 70 then stop; end if; set_vect_of_cums(rec,the_cums); -- make insertion into list of cums set_vect_of_cums2(rec,the_cums2); -- make insertion into list of cums set_num_childr(rec,nvc := ncr + 1); -- there is one more child update_cums(rec,ix,x_cum); -- adjust the given and following cumulants --print("inserted at position: ",ix," ",x," ",hexify(dr_load(rec))); if tree_level > 0 or nvc <= wixnc_hi_lim then -- no need to split pass("wix_in_nend_nc_nos"); return; -- done with this case end if; -- otherwise we must split, and becomes compound pass("wix_in_nend_nc_split"); -- otherwise we are at the very top of the tree, so we must split it into two parts and create a new tree level th := two_halves(rec); -- get the two halves set_is_compound(rec,true); -- note that it is indeed compound --print("set_vect_of_children: insertion B: ",dr_is_compound(rec), if not dr_is_compound(rec) then str(wix_dump(rec)) else "" end if); set_vect_of_children(rec,th); -- split into 2 non-compound subtrees set_ch_cum(rec,1,wix_get_cum(voc(rec,1))); set_ch_cum(rec,2,the_cum + x_cum); set_ch_cum2(rec,1,wix_get_cum2(voc(rec,1))); set_ch_cum2(rec,2,wix_get_cum2(voc(rec,2))); --print("just became compound: ",dr_is_compound(rec)," ",wix_dump(rec)); stop; return; -- done with this case end if; -- otherwise we deal with the compound case pass("wix_in_nend_comp"); c := voc(rec,ix); -- get the child into which the insertion will now be made -- and get the position in this child at which the insertion will be made tree_level +:= 1; -- down a level insert_in(c,srch_on,if srch_on = 1 then j - prev_cum else j end if,x); -- insert x into this child tree_level -:= 1; -- back up a level set_voc(rec,ix,c); -- put the possibly revised child back into position update_cums(rec,ix,x_cum); -- adjust the given and following cumulants set_ch_cum2(rec,ix,wix_get_cum2(c)); -- adjust the second cumulant on record for the child -- which may have changed if the insertion was at the end of the child if (if dr_is_compound(c) then wix_hi_lim else wixnc_hi_lim end if) >= num_childr(c) then pass("wix_in_nend_comp_nos"); return; -- no need to split the child end if; pass("wix_in_nend_comp_split"); -- otherwise we must split the child split_node(rec,ix); -- split the child into two. we insert an empty node to the right -- of node ix, and then move half the nodes into it if tree_level > 0 or (nvc := num_childr(rec)) <= wix_hi_lim then -- no need to split this node pass("wix_in_nend_comp_nosthis"); return; -- done with this case end if; -- otherwise we are at the very top of the tree, so we must split it into two parts and create a new tree level --print("set_vect_of_children: insertion C: ",dr_is_compound(rec), if not dr_is_compound(rec) then str(wix_dump(rec)) else "" end if); pass("wix_in_nend_comp_splitthis"); set_vect_of_children(rec,two_halves(rec)); set_ch_cum(rec,1,wix_get_cum(voc(rec,1))); set_ch_cum(rec,2,the_cum + x_cum); set_ch_cum2(rec,1,wix_get_cum2(voc(rec,1))); set_ch_cum2(rec,2,wix_get_cum2(voc(rec,2))); end insert_in; procedure two_halves(rec); -- split this tree into two halves -- the two nodes created share the children of the original tree, so that the refcounts of -- these children need no adjustment. The nodes created each have a refcount of 1. -- Note that this routine is only called if rec has no parent. the_type := if (ic := dr_is_compound(rec)) then wd_index_node_record else wd_index_node_ncr end if; u1 := dr_new_rec(); u2 := dr_new_rec(); -- make and initialize two subtrees set_type(u1,the_type); set_type(u2,the_type); -- the halves are compound iff this tree is compound -- we must subtract the cumulant of the last retained child from all the children that move -- to get the cumulant of the left node hnvc := (nvc := num_childr(rec))/2; cum_last_retained := get_ch_cum(rec,hnvc); for j in [hnvc + 1..nvc] loop -- subtract this from the cumulant of each child that will move set_ch_cum(rec,j,get_ch_cum(rec,j) - cum_last_retained); end loop; vocums := vect_of_cums(rec); -- get the vector of cums vocums2 := vect_of_cums2(rec); -- get the vector of cums if ic then pass("wix_halves_nc"); set_vect_of_children(u1,(voch := vect_of_children(rec))(1..4 * hnvc)); else pass("wix_halves_comp"); set_num_childr(u1,hnvc); -- set the number of children of u1 end if; set_vect_of_cums(u1,vocums(1..5 * hnvc)); -- u1 gets half the children and cums set_vect_of_cums2(u1,vocums2(1..12 * hnvc)); -- u1 gets half the children and cums set_vect_of_cums(u2,vocums(5 * hnvc + 1..)); -- the second half inherits adjusted cumulants from the original tree set_vect_of_cums2(u2,vocums2(12 * hnvc + 1..)); -- likewise for the second cums set_num_childr(u1,hnvc); -- set the number of children of u1 if ic then set_vect_of_children(u2,voch(4 * hnvc + 1..)); -- u2 gets the other half of the children else set_num_childr(u2,nvc - hnvc); -- set the number of children of u2 end if; --print("two_halves: ",str(wix_dump(u1)),"\n",str(wix_dump(u2)),"\n",hexify(voch(4 * hnvc + 1..)),"\n",hexify(dr_load(u2))); return u1 + u2; -- assemble the two nodes of the new compound tree; return as a string end two_halves; procedure hpull_from_left(rec,k); -- split children with left sibling if k = 1 or num_childr(voc(rec,k - 1)) <= wix_low_lim then return false; end if; pass("wix_pull_left"); share_right(rec,k - 1); return true; end hpull_from_left; procedure hpull_from_right(rec,k); -- split children with right sibling if k >= (nrv := num_childr(rec)) or num_childr(voc(rec,k + 1)) <= wix_low_lim then return false; end if; pass("wix_pull_right"); share_right(rec,k); return true; end hpull_from_right; procedure hshare_right(rec,k); -- share children with right-hand sibling -- we divide the children of the k-th node, together with those of the k+1'st, -- into two roughly equal groups, and make these the k-th and k+1'st nodes. The -- cumulative totals must be adjusted in the k-th node, in the -- children moved between nodes, and in the children of the k+1'st node -- this routine must first copy the nodes among which children will move, if they have more than 1 reference. -- but it does not change the number of references to the children, so that their refcounts need no adjustment --print("share_right: ",k," ",str(wix_dump(rec))); if (ic := dr_is_compound(ndk := voc(rec,k))) then -- a compound node is involved in the sharing pass("wix_share_nc"); nchkp1 := #(rkp1 := vect_of_children(ndkp1 := voc(rec,k + 1)))/4; nchk := #(rk := vect_of_children(ndk))/4; -- get the two groups of children, and their lengths else pass("wix_share_comp"); nchkp1 := num_childr(ndkp1 := voc(rec,k + 1)); nchk := num_childr(ndk); end if; numleft := (nchk + nchkp1)/2; -- half the children; the left-hand will get this number of children if refcount(int_of_4(ndkp1)) > 1 then -- must copy pass("wix_share_copy"); stg := dr_load(ndkp1); new_r := dr_new_rec(); dr_setrecbuf(new_r,stg);dr_dirtify(new_r); incref(ndkp1,-1); ndkp1 := new_r; -- substitute copy for original increfs(new_r,1); set_voc(rec,k + 1,new_r); end if; if refcount(int_of_4(ndk)) > 1 then -- must copy pass("wix_share_copy2"); stg := dr_load(ndk); new_r := dr_new_rec(); dr_setrecbuf(new_r,stg);dr_dirtify(new_r); incref(ndk,-1); ndk := new_r; -- substitute copy for original increfs(new_r,1); set_voc(rec,k,new_r); end if; if numleft > nchk then -- children will move left pass("wix_share_move_left"); num_mov := numleft - nchk; -- the number that will move left --print("move left: ",num_mov," ",hexify(dr_load(ndk)),"\n",hexify(dr_load(ndkp1))); -- we must subtract the cumulant of the last child moving left -- from the cumulants of all the right-hand children which do not move, -- and must add this to the cumulant of the k-th node. We must also -- add the cumulant of the last left-hand child of ndk to the cumulants of all -- the right-hand children which do move. ndk_cum := get_ch_cum(rec,k); -- get the cumulant of ndk right_cum := wix_get_cum(ndk); -- cumulant of the last child of ndk moved_cum := get_ch_cum(ndkp1,num_mov); -- cumulant of the last child of ndkp1 that moves set_ch_cum(rec,k,moved_cum + ndk_cum); -- add moved_cum to the cumulant of the k-th node set_ch_cum2(rec,k,get_ch_cum2(ndkp1,num_mov)); -- correct the second cumulant of the k-th node for j in [1..num_mov] loop set_ch_cum(ndkp1,j,right_cum + get_ch_cum(ndkp1,j)); end loop; for j in [num_mov + 1..nchkp1] loop set_ch_cum(ndkp1,j,get_ch_cum(ndkp1,j) - moved_cum); end loop; -- and now we must move the corresponding cums cumvp1 := vect_of_cums(ndkp1); -- second vector of cums --print("ndkp1: ",hexify(ndkp1)," ",hexify(dr_load(ndkp1))); stop; set_vect_of_cums(ndk,vect_of_cums(ndk) + cumvp1(1..num_mov * 5)); -- move the cums in set_vect_of_cums(ndkp1,cumvp1(num_mov * 5 + 1..)); -- move the cums out -- since the number of cums of ndkp1 is defined by its number of children, we need not edit that list cumvp1 := vect_of_cums2(ndkp1); -- second vector of second cums --print("ndkp1: ",hexify(ndkp1)," ",hexify(dr_load(ndkp1))); stop; set_vect_of_cums2(ndk,vect_of_cums2(ndk) + cumvp1(1..num_mov * 12)); -- move the cums in set_vect_of_cums2(ndkp1,cumvp1(num_mov * 12 + 1..)); -- move the cums out if ic then -- if the nodes are compound we must move children in addition to cumulants set_vect_of_children(ndk,rk + rkp1(1..4 * num_mov)); -- now actually move the children set_vect_of_children(ndkp1,rkp1(4 * num_mov + 1..)); else -- not compound; must adjust the number of children set_num_childr(ndk,nchk + num_mov); set_num_childr(ndkp1,nchkp1 - num_mov); end if; --print("move the children: ",rk," ",rkp1," ",num_mov); else -- children will move right pass("wix_share_move_right"); -- we must subtract the cumulant of the last remaining child -- from that of each of the children moving right. The remaining cumulant of -- the last child moving right must then be added to the cumulants of -- all the original children of the (k + 1)-st node --print("numleft: ",numleft); rem_left_cum := get_ch_cum(ndk,numleft); -- cumulant of the last remaining child of ndk for j in [numleft + 1..nchk] loop -- subtract this from the cum of all the nodes moving right set_ch_cum(ndk,j,get_ch_cum(ndk,j) - rem_left_cum); end loop; total_moved_cum := get_ch_cum(ndk,nchk); -- get the cumulant of the last node moving right for j in [1..nchkp1] loop -- add this to the cum of all the children of the (k + 1)-st node set_ch_cum(ndkp1,j,total_moved_cum + get_ch_cum(ndkp1,j)); end loop; set_ch_cum(rec,k,get_ch_cum(rec,k) - total_moved_cum); -- the cumulant of the last node moving right must be subtracted from the cumulant of the k-th node set_ch_cum2(rec,k,get_ch_cum2(ndk,numleft)); -- update the second cum of the moved node -- and now we must move the corresponding cums cumv := vect_of_cums(ndk); -- first vector of cums cumvp1 := vect_of_cums(ndkp1); -- second vector of cums set_vect_of_cums(ndkp1,cumv(5 * numleft + 1..5 * nchk) + vect_of_cums(ndkp1)); -- move the cums -- since the number of cums of ndk is defined by its number of children, we need not edit that list cumv := vect_of_cums2(ndk); -- first vector of cums cumvp1 := vect_of_cums2(ndkp1); -- second vector of cums set_vect_of_cums2(ndkp1,cumv(12 * numleft + 1..12 * nchk) + vect_of_cums2(ndkp1)); -- move the cums if ic then -- if the nodes are compound we must move children in addition to cumulants set_vect_of_children(ndk,rk(1..4 * numleft)); -- now actually move the children set_vect_of_children(voc(rec,k + 1),rk(4 * numleft + 1..) + rkp1); else -- not compound; must adjust the number of children set_num_childr(ndk,numleft); set_num_childr(ndkp1,nchk + nchkp1 - numleft); --print("numleft: ",numleft," ",num_childr(voc(rec,k))," ",num_childr(ndk)); end if; end if; end hshare_right; procedure hjoin_with_left(rec,k); -- join to left sibling -- this routine must copy the left sibling if it has a refcount > 1; and must reduce the -- refcount of the right sibling --print("join_with_left: ",k," ",wix_dump(rec)); if k = 1 then return false; end if; ndkm1 := voc(rec,k - 1); -- get the left sibling node if (ic := dr_is_compound(ndk := voc(rec,k))) then -- children are compound nodes pass("wix_join_left_comp"); nchk := #(rk := vect_of_children(ndk))/4; nchkm1 := #(rkm1 := vect_of_children(ndkm1))/4; --print("join_with_left: ",nchkm1," ",ic); else -- children are non-compound nodes pass("wix_join_left_nc"); nchk := num_childr(ndk); -- get the nominal number of children nchkm1 := num_childr(ndkm1); -- get the nominal number of children end if; if refcount(int_of_4(ndkm1)) > 1 then -- must copy pass("wix_join_left_copy"); stg := dr_load(ndkm1); new_r := dr_new_rec(); dr_setrecbuf(new_r,stg);dr_dirtify(new_r); incref(ndkm1,-1); ndkm1 := new_r; -- substitute copy for original increfs(new_r,1); set_voc(rec,k + 1,new_r); end if; -- the cumulant of the left sibling simply becomes that of the right sibling set_ch_cum(rec,k - 1,get_ch_cum(rec,k)); set_ch_cum2(rec,k - 1,get_ch_cum2(rec,k)); last_left_cums := wix_get_cum(ndkm1); -- the cumulant of the last child of the left sibling -- must be added to that of every child of the right sibling for j in [1..nchk] loop set_ch_cum(ndk,j,last_left_cums + get_ch_cum(ndk,j)); end loop; set_vect_of_cums(ndkm1,vect_of_cums(ndkm1) + vect_of_cums(ndk)); -- the left sibling gets all the cumulants set_vect_of_cums2(ndkm1,vect_of_cums2(ndkm1) + vect_of_cums2(ndk)); -- the left sibling gets all the cumulants if ic then set_vect_of_children(ndkm1,rkm1 + rk); -- the left sibling gets all the children else set_num_childr(ndkm1,nchk + nchkm1); -- the left sibling gets the full number of children end if; -- now delete the k-th cumulant the_cums := vect_of_cums(rec); -- get the cums of the current node the_cums(5 * k - 4..5 * k) := ""; set_vect_of_cums(rec,the_cums); -- delete the k-th cumulant the_cums := vect_of_cums2(rec); -- get the second cums of the current node the_cums(12 * k - 11..12 * k) := ""; set_vect_of_cums2(rec,the_cums); -- delete the k-th second cumulant the_ch := vect_of_children(rec); -- get the children of the current node the_ch(4 * k - 3..4 * k) := ""; set_vect_of_children(rec,the_ch); -- delete the k-th node if ic then set_vect_of_children(ndk,""); end if; -- the children of ndk have been transfered incref(ndk,-1); -- drop the number of references to ndk; possibly erasing it --print("done join_with_left: ",hexify(dr_load(ndkm1))); return true; end hjoin_with_left; procedure hjoin_with_right(rec,k); -- join to right sibling -- this routine must copy the right sibling if it has a refcount > 1; and must reduce the -- refcount of the left sibling --print("join_with_right: ",k," ",str(wix_dump(rec))); if k >= (nrv := num_childr(rec)) then return false; end if; ndkp1 := voc(rec,k + 1); -- get node to the right if (ic := dr_is_compound(ndk := voc(rec,k))) then -- children are compound nodes nchk := #(rk := vect_of_children(ndk))/4; rkp1 := vect_of_children(ndkp1); pass("wix_join_right_comp"); else -- children are non-compound nodes pass("wix_join_right_nc"); nchk := num_childr(ndk); -- get the nominal number of children end if; if refcount(int_of_4(ndkp1)) > 1 then -- must copy pass("wix_join_right_copy"); stg := dr_load(ndkp1); new_r := dr_new_rec(); dr_setrecbuf(new_r,stg);dr_dirtify(new_r); incref(ndkp1,-1); ndkp1 := new_r; -- substitute copy for original increfs(new_r,1); set_voc(rec,k + 1,new_r); end if; last_left_cums := wix_get_cum(ndk); -- the cumulant of the last child of the left sibling -- must be added to that of every child of the right sibling for j in [1..nchkp1 := num_childr(ndkp1)] loop set_ch_cum(ndkp1,j,last_left_cums + get_ch_cum(ndkp1,j)); end loop; set_vect_of_cums(ndkp1,vect_of_cums(ndk) + vect_of_cums(ndkp1)); -- the right node gets all the cumulants set_vect_of_cums2(ndkp1,vect_of_cums2(ndk) + vect_of_cums2(ndkp1)); -- the right node gets all the cumulants if ic then -- must ransfer not only cumulants, but also children set_vect_of_children(ndkp1,rk + rkp1); -- the right node gets all the children else set_num_childr(ndkp1,nchk + nchkp1); -- the left sibling gets the full number of children end if; -- now delete the k-th cumulant the_cums := vect_of_cums(rec); -- get the cums of the current node the_cums(5 * k - 4..5 * k) := ""; set_vect_of_cums(rec,the_cums); -- delete the k-th cumulant the_cums := vect_of_cums2(rec); -- get the second cums of the current node the_cums(12 * k - 11..12 * k) := ""; set_vect_of_cums2(rec,the_cums); -- delete the k-th second cumulant --print("join_with_right: ",nchk," ",last_left_cums," ",hexify(the_cums)); the_ch := vect_of_children(rec); -- get the children of the current node the_ch(4 * k - 3..4 * k) := ""; set_vect_of_children(rec,the_ch); -- delete the k-th node --print("done join_with_right: ",hexify(dr_load(ndkp1))); if ic then set_vect_of_children(ndk,""); end if; -- the children of ndk have been transfered incref(ndk,-1); -- drop the number of references to ndk; possibly erasing it return true; end hjoin_with_right; procedure hsplit_node(rec,k); -- split the k-th node into two -- we insert an empty node to the right of node k, and then share the children of node k with this empty node -- the empty node inserted stars wwith refcount = 1 --print("split_node entry: ",str(wix_dump(rec))," ",k); voch := vect_of_children(rec); vocums := vect_of_cums(rec); vocums2 := vect_of_cums2(rec); vocums(5 * k + 1..5 * k) := vocums(5 * k - 4..5 * k); -- duplicate the prior cums vocums2(12 * k + 1..12 * k) := vocums2(12 * k - 11..12 * k); --print("split_node #vocums: ",#vocums); if #vocums > 45 then print(hexify(vocums)); stop; end if; set_vect_of_cums(rec,vocums); -- and insert the revised cums into the record set_vect_of_cums2(rec,vocums2); -- duplicate the prior second cum and insert it into the record -- insert new node, whose number of children is automatically 0 u2 := dr_new_rec(); set_type(u2,if dr_is_compound(voc(rec,1)) then wd_index_node_record else wd_index_node_ncr end if); voch(4 * k + 1..4 * k) := u2; -- put in the new child --print("set_vect_of_children: split_node: ",dr_is_compound(rec), if not dr_is_compound(rec) then str(wix_dump(rec)) else "" end if); set_vect_of_children(rec,voch); -- and insert the revised children into the record share_right(rec,k); -- share the children of node k with this empty node --print("split_node exit: ",str(wix_dump(rec))," ",num_childr(rec)," ",str(wix_dump(voc(rec,k)))," ",num_childr(voc(rec,k))); end hsplit_node; procedure wix_check_tree_structure(tree); -- recursive check of tree structure var level_now := 0,set_of_levels := { }; var smallest_branching := 10000; check_tree_structure_in(tree); -- call inner recursive workhorse if #set_of_levels > 1 then print("TREE STRUCTURE INCONSISTENCY, LEVELS ARE: ",set_of_levels," ",str(wix_dump(tree))); return false; end if; if smallest_branching < wix_low_lim then print("TREE STRUCTURE INCONSISTENCY, BRANCHING LEVEL DROPS TO: ",smallest_branching); return false; end if; return true; -- otherwise OK procedure check_tree_structure_in(tree); -- inner recursive workhorse level_now +:= 1; nc := num_childr(tree); if level_now > 1 then smallest_branching := smallest_branching min nc; end if; if dr_is_compound(tree) then for j in [1..nc] loop check_tree_structure_in(voc(tree,j)); end loop; else -- at leaf, so collect level of leaf set_of_levels with:= level_now; end if; level_now -:= 1; -- restor te prior level end check_tree_structure_in; end wix_check_tree_structure; end B_tree_for_wdix; program test; -- tests of B-tree operations for word index trees use setldb,byteutil,B_tree_for_wdix,db_records,string_utility_pak,disk_records_pak; var tree,small_tree; -- tree to test var simple_string_tup; -- for generating words for testing simple_string_tup := breakup("0123456789.abcdefghij.klmnopqrst.ABCDEFGHIJ.KLMNOPQRST","."); --print(str([#encode_wd(j * "aaa"): j in [0..50]])); -- should always be multiple of 4 --print(str([decode_wd(encode_wd(j * "Aab")) = j * "Aab": j in [0..50]])); -- should recover --print(str([decode_wd(encode_wd(j * "Aab")): j in [49..50]])); -- should recover code_pts := wix_code_pts; -- code points to be traversed --points not passed: {"wix_in_copy", "wix_join_left_copy", "wix_share_copy2", "wix_in_nend2", "wix_nlast2", --"wix_last2"} str_len_cum_tests; -- tests of B-tree operations for word index trees report_points_passed(); -- report on code points not traversed procedure newt(j); return [("Z" + simple_string_tup((j/10) mod 5 + 1)(1..(j mod 10) + 1)),1]; end newt; -- leaf creator procedure newtbg(j); return [("U" + simple_string_tup((j/10) mod 5 + 1)(1..(j mod 10) + 1)),1]; end newtbg; -- leaf creator procedure raw_newt(j); return ["Z" + simple_string_tup((j/10) mod 5 + 1)(1..(j mod 10) + 1),1]; end raw_newt; -- leaf creator procedure raw_newtbg(j); return ["U" + simple_string_tup((j/10) mod 5 + 1)(1..(j mod 10) + 1),1]; end raw_newtbg; -- leaf creator procedure str_len_cum_tests; -- tests of B-tree operations for record occurence trees tree := wix_make_from_tuple(breakex("abc:1,def:1,GGa:1,GGb:1,GGc:1,GGd:1,GGe:1,GGGf:1,GGGg:1,GGGhh:1,GGGJ:1,GGGK:1")); for j in [1..150] loop wix_insert(tree,OM,newt(j)); -- insert leaf into tree end loop; print("check_consistency - insertions at end: ",check_consistency(tree)); incref(tree,-1); -- demolish the tree print("memory check 01: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); --print(cm2 := cm(2)); --for r in cm2 loop print(hexify(dr_load(stg_of_4(r)))); end loop; tree := wix_make_from_tuple([raw_newt(j): j in [1..50]] + [raw_newtbg(j): j in [1..50]]); print(str(wix_dump(tree))); print("check_consistency: ",check_consistency(tree)); incref(tree,-1); -- demolish the tree print("memory check 00: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); small_tree := wix_make_from_tuple(breakex("abc:1,def:1,GGG:1")); print("small_tree: ",str(wix_dump(small_tree))); for j in [1..10] loop wix_insert(small_tree,1,newt(j)); -- insert leaf at start of small tree -- print("small_tree after insertion: ",j," ",str(wix_dump(small_tree))); end loop; print("small_tree after 10 insertions: ",str(wix_dump(small_tree))); print("check_consistency: ",check_consistency(small_tree)); incref(small_tree,-1); -- demolish the tree print("memory check 0: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); tree := wix_make_from_tuple(breakex("abc:1,def:1,GGa:1,GGb:1,GGc:1,GGd:1,GGe:1,GGGf:1,GGGg:1,GGGhh:1,GGGJ:1,GGGK:1")); print("tree: ",str(wix_dump(tree))); for j in [1..10] loop wix_set_comp(tree,j,newt(j)); end loop; print("tree after changes: ",str(wix_dump(tree))); for j in [1..10] loop wix_set_comp(tree,1,OM); end loop; print("tree after 10 deletions: ",str(wix_dump(tree))); print("check_consistency: ",check_consistency(tree)); incref(tree,-1); -- demolish the tree print("memory check 0A: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); small_tree := wix_make_from_tuple(breakex("abc:1,def:1,GGG:1")); for j in [1..150] loop wix_insert(small_tree,1,newt(j)); -- insert leaf at start of small tree -- print("start insertion: ",j," ",str(wix_dump(small_tree))); end loop; print("check_consistency: ",check_consistency(small_tree)," ",str(wix_dump(small_tree))); incref(small_tree,-1); -- demolish the tree print("memory check 0: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); small_tree := wix_make_from_tuple(breakex("abc:1,def:1,GGG:1")); print("small_tree: ", str(wix_dump(small_tree))); print("leaves(small_tree): ",collect_leaves(small_tree)); print("wix_comp_cum: ",wix_comp_cum(small_tree,2)); print("small_tree comps: ", str([stg_and_len(wix_comp(small_tree,j)): j in [1..3]])); for j in [1..wix_get_cum(small_tree)] loop wix_set_comp(small_tree,j,newtbg(j)); end loop; print("small_tree changed: ", str(wix_dump(small_tree))); print("\nInsertions at end"); for j in [1..3] loop wix_insert(small_tree,OM,newt(j)); end loop; print(str(wix_dump(small_tree))); print("\nAdditional Insertions at start"); for j in [1..3] loop wix_insert(small_tree,1,newt(j)); end loop; print("check_consistency small_tree: ",check_consistency(small_tree)," ",str(wix_dump(small_tree))); incref(small_tree,-1); -- demolish the tree print("memory check small_tree: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); tree := wix_make_from_tuple(breakex("abc:1,def:1,GGa:1,GGb:1,GGc:1,GGd:1,GGe:1,GGGf:1,GGGg:1,GGGhh:1,GGGJ:1,GGGK:1,13:1,14:1,15:1")); print("tree: ",str(wix_dump(tree))); print("tree comps: ",str([stg_and_len(wix_comp(tree,j)): j in [1..12]])); for j in [1..wix_get_cum(tree)] loop wix_set_comp(tree,j,newtbg(j)); end loop; print("tree changed: ",str(wix_dump(tree))); print("\nInsertions at end"); for j in [1..15] loop wix_insert(tree,OM,newt(j)); --print(hexify(dr_load(tree))); stop; end loop; print(str(wix_dump(tree))); print("\nAdditional Insertions at start"); for j in [1..15] loop wix_insert(tree,1,newt(j)); end loop; print("check_consistency tree: ",check_consistency(tree)," ",str(wix_dump(tree))); incref(tree,-1); -- demolish the tree print("memory check tree: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); small_tree := wix_make_from_tuple(breakex("abc:1,def:1,GGG:1")); for j in [1..150] loop wix_insert(small_tree,OM,newt(j)); -- insert leaf at end of small tree end loop; print("check_consistency: ",check_consistency(small_tree)," ",str(wix_dump(small_tree))); incref(small_tree,-1); -- demolish the tree small_tree := wix_make_from_tuple(breakex("MGGf:1,MGGg:1,MGGhh:1")); print("small_tree: ",str(wix_dump(small_tree))); print("components: ",collect_leaves(small_tree)," ",collect_cums(small_tree)," ",check_consistency(small_tree)); for j in [1..wix_get_cum(small_tree)] loop wix_set_comp(small_tree,j,newt(256 * j)); -- change leaf of small tree if not check_consistency(small_tree) then print("small_tree components after change: ",j," ",collect_cums(small_tree)); stop; end if; end loop; print("small_tree change test passed"); for j in [1..wix_get_cum(small_tree)] loop wix_set_comp(small_tree,1,OM); -- delete leaf of small tree if not check_consistency(small_tree) then print("small_tree components after deletion: ",j," ",collect_cums(small_tree)); stop; end if; end loop; print("small_tree deletion test passed"); incref(small_tree,-1); -- demolish the tree print("memory check 1: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); small_tree := wix_make_from_tuple(breakex("MGGf:1,MGGg:1,MGGhh:1")); for j in [1..wix_get_cum(small_tree)] loop wix_set_comp(small_tree,OM,OM); -- delete leaf of small tree if not check_consistency(small_tree) then print("small_tree components after deletion: ",j," ",collect_cums(small_tree)); stop; end if; end loop; print("small_tree end deletion test passed"); incref(small_tree,-1); -- demolish the tree print("memory check 2: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); tree := wix_make_from_tuple(breakex("abc:1,def:1,GGa:1,GGb:1,GGc:1,GGd:1,GGe:1,GGGf:1,GGGg:1,GGGhh:1,GGGJ:1,GGGK:1")); print("tree: ",str(wix_dump(tree))," ",wix_get_cum(tree)," ",wix_get_cum2(tree)," ",check_consistency(tree)); print("components: ",collect_leaves(tree)," ",collect_cums(tree)); for j in [1..wix_get_cum(tree)] loop wix_set_comp(tree,j,newt(j)); -- change leaf of small tree if not check_consistency(tree) then print("tree components after change: ",j," ",collect_cums(small_tree)); stop; end if; end loop; print("change test passed"); print("components after changes: ",str(wix_dump(tree))," ",check_consistency(tree)); for j in [1..wix_get_cum(tree)] loop wix_set_comp(tree,1,OM); -- delete leaf of small tree if not check_consistency(tree) then print("tree components after deletion: ",j," ",collect_cums(small_tree)); stop; end if; end loop; print("deletion test passed"); incref(tree,-1); -- demolish the tree print("memory check 3: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); tree := wix_make_from_tuple(breakex("abc:1,def:1,GGa:1,GGb:1,GGc:1,GGd:1,GGe:1,GGGf:1,GGGg:1,GGGhh:1,GGGJ:1,GGGK:1")); for j in [1..wix_get_cum(tree)] loop wix_set_comp(tree,OM,OM); -- delete leaf of small tree if not check_consistency(tree) then print("tree components after deletion: ",j," ",collect_cums(small_tree)); stop; end if; end loop; print("end deletion test passed"); incref(tree,-1); -- demolish the tree print("memory check 4: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); use_history := []; for reps in [1..the_last := 15] loop tree := wix_make_from_tuple(breakex("abc:1,def:1,GGa:1,GGb:1,GGc:1,GGd:1,GGe:1,GGGf:1,GGGg:1,GGGhh:1,GGGJ:1,GGGK:1")); xtree := tree; incref(tree,1); -- save and note a second copy for j in [1..7] loop wix_set_comp(tree,1,OM); end loop; -- deletions in one copy if reps = the_last then print("tree: ",str(wix_dump(tree))); print("xtree: ",str(wix_dump(xtree))); end if; incref(tree,-1); incref(xtree,-1); -- demolish both trees use_history with:= str(if is_tuple(cm := check_memory()) then cm(1) else cm end if); end loop; print("use_history A:",str(use_history)); use_history := []; for reps in [1..the_last := 15] loop small_tree := wix_make_from_tuple(breakex("abc:1,def:1,GGG:1")); for j in [1..34] loop wix_insert(small_tree,OM,newt(j)); -- insert leaf into small tree --print("insertion: ",j," ",str(wix_dump(small_tree))); --if not check_consistency(small_tree) then stop; end if; end loop; if reps = the_last then print("small tree after insertions: ",str(wix_dump(small_tree))," ",wix_get_cum(small_tree)," ",check_consistency(small_tree)); end if; incref(small_tree,-1); -- demolish the tree use_history with:= str(if is_tuple(cm := check_memory()) then cm(1) else cm end if); end loop; print("use_history B:",str(use_history)); use_history := []; small_tree := wix_make_from_tuple(breakex("abc:1,def:1,GGG:1")); for j in [1..3] loop wix_set_comp(small_tree,1,OM); -- delete leaf of small tree print("small tree: ",str(wix_dump(small_tree))); -- check the tree structure end loop; incref(small_tree,-1); -- demolish the tree use_history with:= str(if is_tuple(cm := check_memory()) then cm(1) else cm end if); for j in [1..10] loop small_tree := wix_make_from_tuple(breakex("abc:1,def:1,GGG:1")); wix_set_comp(small_tree,1,newt(j)); -- change leaf of small tree wix_set_comp(small_tree,1,newt(j)); -- change leaf of small tree incref(small_tree,-1); -- demolish the tree use_history with:= str(if is_tuple(cm := check_memory()) then cm(1) else cm end if); end loop; print("use_history C:",str(use_history)); small_tree := wix_make_from_tuple(breakex("abc:1,def:1,GGG:1")); for j in [1..3] loop wix_set_comp(small_tree,j,newt(j)); -- change leaf of small tree end loop; print("small_tree after changes: ",collect_cums(small_tree)," ",wix_get_cum(small_tree)); print("check_consistency - small_tree after changes: ",check_consistency(small_tree)); incref(small_tree,-1); -- demolish the tree use_history with:= str(if is_tuple(cm := check_memory()) then cm(1) else cm end if); print("use_history D: ",str(use_history)); tree := wix_make_from_tuple(breakex("abc:1,def:1,GGa:1,GGb:1,GGc:1,GGd:1,GGe:1,GGGf:1,GGGg:1,GGGhh:1,GGGJ:1,GGGK:1")); print(str(wix_dump(tree))); print("tree length is: ",wix_get_cum(tree)); print("distinct tree components w cums. are: ",str(collect_cums(tree))); print("distinct tree components are: ",str(collect_leaves(tree))); for j in [1..10] loop wix_set_comp(tree,j,newt(j)); -- change leaf of tree end loop; print("tree after changes: ",collect_cums(tree)," ",wix_get_cum(tree)); print("check_consistency - tree after changes: ",check_consistency(tree)); incref(tree,-1); -- demolish the tree print("use_history E :",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); tree := wix_make_from_tuple(breakex("abc:1,def:1,GGa:1,GGb:1,GGc:1,GGd:1,GGe:1,GGGf:1,GGGg:1,GGGhh:1,GGGJ:1,GGGK:1")); print("tree before deletion: ",str(wix_dump(tree))," ",wix_get_cum(tree)); wix_set_comp(tree,1,OM); -- delete leaf of small tree print("tree after 1 deletion: ",str(wix_dump(tree)),"\n",str(collect_cums(tree))," ",wix_get_cum(tree)); print("check_consistency - tree after 1 deletion: ",check_consistency(tree)); for j in [1..10] loop wix_set_comp(tree,1,OM); -- delete leaf of tree end loop; print("check_consistency - tree after deletions: ",check_consistency(tree)); incref(tree,-1); -- demolish the tree print("use_history F: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); tree := wix_make_from_tuple(breakex("abc:1,def:1,GGa:1,GGb:1,GGc:1,GGd:1,GGe:1,GGGf:1,GGGg:1,GGGhh:1,GGGJ:1,GGGK:1")); print("tree before deletion: ",str(wix_dump(tree))," ",wix_get_cum(tree)); wix_set_comp(tree,OM,OM); -- delete leaf of small tree print("tree after 1 deletion: ",str(wix_dump(tree))," ",wix_get_cum(tree)); print("check_consistency - tree after 1 end deletion: ",check_consistency(tree)); for j in [1..10] loop wix_set_comp(tree,OM,OM); -- delete leaf of tree end loop; print("check_consistency - tree after end deletions: ",wix_get_cum(tree)," ",check_consistency(tree)); for j in [1..150] loop wix_insert(tree,1,newt(j)); -- insert leaf into tree end loop; print("check_consistency - insertions at start: ",check_consistency(tree)); incref(tree,-1); -- demolish the tree print("use_history G: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); --incref(tree,-1); -- demolish the tree print("use_history H: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); small_tree := wix_make_from_tuple(breakex("abc:1,def:1,GGG:1")); for j in [1..150] loop wix_insert(small_tree,1,newt(j)); -- insert leaf into small tree end loop; print("check_consistency - small_tree insertions at start: ",check_consistency(small_tree)); incref(small_tree,-1); -- demolish the tree print("use_history I: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); small_tree := wix_make_from_tuple(breakex("abc:1,def:1,GGG:1")); for j in [1..150] loop wix_insert(small_tree,OM,newt(j)); -- insert leaf into small tree end loop; print("check_consistency - small_tree insertions at end: ",check_consistency(small_tree)); incref(small_tree,-1); -- demolish the tree print("use_history J: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); small_tree := wix_make_from_tuple(breakex("abc:1,def:1,GGG:1")); print(str(wix_dump(small_tree))); print("small_tree length is: ",wix_get_cum(small_tree)); print("distinct small_tree components w cums. are: ",collect_cums(small_tree)); print("distinct small_tree components are: ",collect_leaves(small_tree)); wix_insert(small_tree,1,newt(9)); -- insert leaf at start of tree print("small_tree after insertion at start: ",collect_cums(small_tree)," ",wix_get_cum(small_tree)); wix_insert(small_tree,OM,newt(9)); -- insert leaf at end of tree print("small_tree after insertion at end: ",collect_cums(small_tree)," ",wix_get_cum(small_tree)); wix_set_comp(small_tree,1,newt(666)); -- change leaf of small tree print("small_tree check: ",str(wix_dump(small_tree))); wix_set_comp(small_tree,1,newt(999)); -- change leaf of small tree print("small_tree recheck: ",str(wix_dump(small_tree))); for j in [1..3] loop wix_set_comp(small_tree,j,newt(j)); -- change leaf of small tree end loop; print("small_tree after changes: ",collect_cums(small_tree)," ",wix_get_cum(small_tree)); incref(small_tree,-1); -- demolish the tree print("use_history K: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); tree := wix_make_from_tuple(breakex("abc:1,def:1,GGa:1,GGb:1,GGc:1,GGd:1,GGe:1,GGGf:1,GGGg:1,GGGhh:1,GGGJ:1,GGGK:1")); print(str(wix_dump(tree))); print("tree length is: ",wix_get_cum(tree)); print("distinct tree components w cums. are: ",collect_cums(tree)); print("distinct tree components are: ",collect_leaves(tree)); for j in [1..10] loop wix_set_comp(tree,j,newt(j)); -- change leaf of small tree end loop; print("tree after changes: ",collect_cums(tree)," ",wix_get_cum(tree)); for j in [1..10] loop wix_set_comp(tree,1,OM); -- delete first leaf of tree end loop; print("tree after deletion: ",10," ",collect_cums(tree)," ",wix_get_cum(tree)); incref(tree,-1); -- demolish the tree print("use_history LL: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); tree := wix_make_from_tuple(breakex("abc:1,def:1,GGa:1,GGb:1,GGc:1,GGd:1,GGe:1,GGGf:1,GGGg:1,GGGhh:1,GGGJ:1,GGGK:1")); for j in [1..10] loop wix_set_comp(tree,OM,OM); -- delete last leaf of tree end loop; print("tree after end deletion: ",10," ",collect_cums(tree)," ",wix_get_cum(tree)); incref(tree,-1); -- demolish the tree print("use_history L: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); tree := wix_make_from_tuple(breakex("abc:1,def:1,GGG:1")); wix_insert(tree,1,newt(1)); -- insert leaf at start of tree print("tree after insertion at start: ",collect_cums(tree)," ",wix_get_cum(tree)); wix_insert(tree,OM,newt(2)); -- insert leaf at end of tree print("tree after insertion at end: ",collect_cums(tree)," ",wix_get_cum(tree)); incref(tree,-1); -- demolish the tree print("use_history M: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); small_tree := wix_make_from_tuple(breakex("abc:1,def:1,GGG:1")); for j in [1..150] loop wix_insert(small_tree,1,newt(j)); -- insert leaf into small tree end loop; print("check_consistency: ",check_consistency(small_tree)); for j in [1..140] loop --if j > 84 then print("deletion: ",j,str(wix_dump(small_tree))); end if; -- wix_set_comp(small_tree,1,OM); -- delete first leaf of tree end loop; print("small_tree after re_deletion at start: ",collect_cums(small_tree)," ",wix_get_cum(small_tree)); print("check_consistency: ",check_consistency(small_tree)); incref(small_tree,-1); -- demolish the tree print("use_history N: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); small_tree := wix_make_from_tuple(breakex("abc:1,def:1,GGG:1")); for j in [1..150] loop wix_insert(small_tree,OM,newt(j)); -- insert leaf into small tree end loop; print("check_consistency: ",check_consistency(small_tree)); for j in [1..140] loop wix_set_comp(small_tree,OM,OM); -- delete last leaf of tree end loop; print("small_tree after re_deletion at end: ",collect_cums(small_tree)," ",wix_get_cum(small_tree)); print("check_consistency: ",check_consistency(small_tree)); incref(small_tree,-1); -- demolish the tree print("use_history O: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); small_tree := wix_make_from_tuple(breakex("abc:1,def:1,GGG:1")); print("last element: ",stg_and_len(wix_comp_cum(small_tree,OM))); print("check_consistency: ",check_consistency(small_tree)); print("small_tree cums: "); print("small_tree leaves with cumulants: ",str([string_and_cum(wix_comp_cum(small_tree,j)): j in [1..wix_get_cum(small_tree)]])); for j in [1..3] loop wix_set_comp(small_tree,1,OM); -- delete first leaf of small tree print("small_tree after deletion: ",j," ",collect_cums(small_tree)," ",wix_get_cum(small_tree)); end loop; for j in [1..34] loop wix_insert(small_tree,OM,newt(j)); -- insert leaf into small tree end loop; print("small_tree after insertion at end: ",collect_cums(small_tree)," ",wix_get_cum(small_tree)); print("last element: ",stg_and_len(wix_comp_cum(small_tree,OM))); print("check_consistency: ",check_consistency(small_tree)); incref(small_tree,-1); -- demolish the tree print("use_history P: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); tree := wix_make_from_tuple(breakex("abc:1,def:1,GGa:1,GGb:1,GGc:1,GGd:1,GGe:1,GGGf:1,GGGg:1,GGGhh:1,GGGJ:1,GGGK:1")); for j in [1..12] loop wix_set_comp(tree,OM,OM); end loop; -- delete last leaf of tree print("tree after right deletions: ",collect_cums(tree)," ",wix_get_cum(tree)); incref(tree,-1); -- demolish the tree print("use_history Q: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); tree := wix_make_from_tuple(breakex("abc:1,def:1,GGa:1,GGb:1,GGc:1,GGd:1,GGe:1,GGGf:1,GGGg:1,GGGhh:1,GGGJ:1,GGGK:1")); for j in [1..12] loop wix_set_comp(tree,1,OM); end loop; -- delete last leaf of tree print("tree after left deletions: ",collect_cums(tree)," ",wix_get_cum(tree)); incref(tree,-1); -- demolish the tree print("use_history R: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); tree := wix_make_from_tuple(breakex("abc:1,def:1,GGa:1,GGb:1,GGc:1,GGd:1,GGe:1,GGGf:1,GGGg:1,GGGhh:1,GGGJ:1,GGGK:1")); for j in [1..12] loop wix_set_comp(tree,j,newt(j)); -- change j-th leaf of tree end loop; print("tree after changes: ",collect_cums(tree)," ",wix_get_cum(tree)); print(); print("check_consistency: ",check_consistency(tree)); incref(tree,-1); -- demolish the tree print("use_history S: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); print("big_tree cums"); tree := wix_make_from_tuple(breakex("abc:1,def:1,GGa:1,GGb:1,GGc:1,GGd:1,GGe:1,GGGf:1,GGGg:1,GGGhh:1,GGGJ:1,GGGK:1")); print("tree leaves with cumulants: ",str([string_and_cum(wix_comp_cum(tree,j)): j in [1..wix_get_cum(tree)]])); print("tree leaves (with reps): ",str([stg_and_len(wix_comp(tree,j)): j in [1..wix_get_cum(tree)]])); incref(tree,-1); -- demolish the tree print("use_history S: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); print("*********** initial_tests ***********"); initial_tests; -- initial tests for B-trees with cumulators print("*********** deletion_tests ***********"); deletion_tests; -- test the deletion operations for B-trees with cumulators print("*********** insertion_tests ***********"); insertion_tests; -- test the insertion operations for B-trees with cumulators end str_len_cum_tests; procedure str_len(stg); return [wix_length(stg)]; end str_len; -- string length function, in a unit tuple procedure collect_leaves(t); -- collect the distinct leaves of a tree if num_childr(t) = 0 then return "[]"; end if; -- WORKAROUND FOR FOLLOWING LINE ***** return str([stg_and_len(wix_comp(t,j)): j in [1..wix_get_cum(t)] | (j = 1 or wix_comp(t,j) /= wix_comp(t,j - 1))]); end collect_leaves; procedure collect_cums(t); -- collect the distinct leaves of a tree if num_childr(t) = 0 then return "[]"; end if; -- WORKAROUND FOR FOLLOWING LINE ***** return str([string_and_cum(wix_comp_cum(t,j)): j in [1..wix_get_cum(t)] | (j = 1 or wix_comp(t,j) /= wix_comp(t,j - 1))]); end collect_cums; procedure string_and_cum(rno_cum); -- convert a string node recno to the corresponding string [rec,rec_len,the_cum] := rno_cum; return [rec + ":" + str(rec_len),the_cum]; end string_and_cum; procedure stg_and_len(s_a_l); -- convert a rid-and-length to readable format [s,l] := s_a_l; return s + ":" + str(l); -- put the rid into shex format end stg_and_len; procedure breakex(stg); -- convert comma-separated string of ints into tuple of 4-byte recodrd ids t := []; for [wd,occslen] in breakup(breakup(stg,","),":") loop reads(occslen,int_occslen); t with:= [wd,int_occslen]; end loop; return t; end breakex; procedure initial_tests; -- initial tests for B-trees with cumulators small_tree := wix_make_from_tuple(breakex("abc:1,def:1,GGG:1")); print("small_tree - a.bb.ccc: ",str(wix_dump(small_tree))); print("small_tree leaf 1 and length: ",stg_and_len(wix_comp(small_tree,1))); print("small_tree leaves (with reps.) and length: ",[stg_and_len(wix_comp(small_tree,j)): j in [1..wix_get_cum(small_tree)]]); print("small_tree leaves (no reps.): ",collect_leaves(small_tree)); incref(small_tree,-1); -- demolish the tree print("use_history T: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); tree := wix_make_from_tuple(breakex("abc:1,def:1,GGa:1,GGb:1,GGc:1,GGd:1,GGe:1,GGGf:1,GGGg:1,GGGhh:1,GGGJ:1,GGGK:1")); print("tree leaves: ",str(wix_dump(tree))); print("tree leaf 1 and length: ",stg_and_len(wix_comp(tree,1)), " " ,wix_get_cum(tree)); print("12 tree leaves (with reps.): ",str([stg_and_len(wix_comp(tree,j)): j in [1..12]])); print("tree leaves (no reps.): ",collect_leaves(tree)); wix_set_comp(tree,1,OM); -- delete the first element print("first element of compound tree deleted: ",wix_get_cum(tree)," ",str(collect_cums(tree))); print("check_consistency: ",check_consistency(tree)); wix_set_comp(tree,2,newt(666)); print("'666' inserted into element of compound tree: ",str(wix_dump(tree))); incref(tree,-1); -- demolish the tree print("use_history T: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); print("Iterative check; changes in successive positions"); all_ok := true; tree := wix_make_from_tuple(breakex("abc:1,def:1,GGa:1,GGb:1,GGc:1,GGd:1,GGe:1,GGGf:1,GGGg:1,GGGhh:1,GGGJ:1,GGGK:1")); -- re-initialize the tree for j in [1..10] loop wix_set_comp(tree,j,newt(j)); if not check_consistency(tree) then print("IC1 - iteration ",j," error ",wix_dump(tree)); print(str(wix_dump(tree))); for mm in [1..wix_get_cum(tree)] loop print(mm,": ",wix_comp(tree,mm)); end loop; all_ok := false; stop; end if; --print("CC1 - iteration ",j," ",str(wix_dump(tree))); --print(str(wix_dump(tree))); end loop; if all_ok then print("Iterative change test passed successfully."); end if; incref(tree,-1); -- demolish the tree print("use_history U: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); end initial_tests; procedure deletion_tests; -- test the deletion operations for B-trees with cumulators print("Iterative check; deletions in successive positions"); all_ok := true; tree := wix_make_from_tuple(breakex("abc:1,def:1,GGa:1,GGb:1,GGc:1,GGd:1,GGe:1,GGGf:1,GGGg:1,GGGhh:1,GGGJ:1,GGGK:1")); -- re-initialize the tree for j in [1..10] loop wix_set_comp(tree,j,OM); if not check_consistency(tree) then print("IC1 - iteration ",j," error ",wix_dump(tree)); print(str(wix_dump(tree))); for mm in [1..wix_get_cum(tree)] loop print(mm,": ",tree(mm)); end loop; all_ok := false; stop; end if; incref(tree,-1); -- demolish the tree --print("IC1 - iteration ",j," ",str(wix_dump(tree))); --print(str(wix_dump(tree))); tree := wix_make_from_tuple(breakex("abc:1,def:1,GGa:1,GGb:1,GGc:1,GGd:1,GGe:1,GGGf:1,GGGg:1,GGGhh:1,GGGJ:1,GGGK:1")); -- re-initialize the tree end loop; if all_ok then print("First deletion test passed successfully."); end if; incref(tree,-1); -- demolish the tree print("use_history V: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); tree := wix_make_from_tuple(breakex("abc:1,def:1,GGa:1,GGb:1,GGc:1,GGd:1,GGe:1,GGGf:1,GGGg:1,GGGhh:1,GGGJ:1,GGGK:1")); -- re-initialize the tree all_ok := true; print("Iterative check; deletions at start"); for j in [1..10] loop wix_set_comp(tree,1,OM); -- delete the first tree element if not check_consistency(tree) then print("IC2 - iteration ",j," error ",wix_dump(tree)); print(str(wix_dump(tree))); all_ok := false; stop; end if; -- print("IC2 - iteration ",j," ",str(wix_dump(tree))); end loop; if all_ok then print("Second deletion test passed successfully."); end if; incref(tree,-1); -- demolish the tree print("use_history W: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); tree := wix_make_from_tuple(breakex("abc:1,def:1,GGa:1,GGb:1,GGc:1,GGd:1,GGe:1,GGGf:1,GGGg:1,GGGhh:1,GGGJ:1,GGGK:1")); -- re-initialize the tree all_ok := true; print("Iterative check; deletions at end"); for j in [1..10] loop wix_set_comp(tree,wix_get_cum(tree),OM); -- delete the last tree element if not check_consistency(tree) then print("IC3 - iteration ",j," error ",wix_dump(tree)); print("#tree is: ", wix_get_cum(tree)); print(str(wix_dump(tree))); all_ok := false; stop; end if; -- print("IC3 - iteration ",j," ",str(wix_dump(tree))); end loop; if all_ok then print("Third deletion test passed successfully."); end if; incref(tree,-1); -- demolish the tree print("use_history X: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); end deletion_tests; procedure insertion_tests; -- test the insertion operations for B-trees with cumulators all_ok := true; print("Iterative check; insertions after end, starting with null tree"); tree := wix_make_from_tuple(breakex("1:1")); wix_set_comp(tree,1,OM); for j in [1..10] loop wix_insert(tree,wix_get_cum(tree) + 1,newt(j)); -- make the insertion --print("After CUM_INS0 - iteration ",j," ",str(wix_dump(tree))); --print(str(wix_dump(tree))); if (not check_consistency(tree)) then print("CUM_INS0 - iteration ",j," error ",lo," ",le); print(str(wix_dump(tree))); all_ok := false; stop; end if; end loop; if all_ok then print("Insertion starting with null tree test passed successfully."); end if; incref(tree,-1); -- demolish the tree print("use_history XX: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); tree := wix_make_from_tuple(breakex("abc:1,def:1,GGa:1,GGb:1,GGc:1,GGd:1,GGe:1,GGGf:1,GGGg:1,GGGhh:1,GGGJ:1,GGGK:1")); -- re-initialize the tree all_ok := true; print("Iterative check; insertions after end"); all_ok := true; for j in [0..9] loop wix_insert(tree,wix_get_cum(tree) + 1,newt(j)); -- make the insertion -- print("After CUM_INS1 - iteration ",j," ",str(wix_dump(tree))); if not check_consistency(tree) then print("CUM_INS1 - iteration ",j," error ",wix_dump(tree)); print(str(wix_dump(tree))); all_ok := false; stop; end if; end loop; if all_ok then print("Insertion-at-end test passed successfully."); end if; incref(tree,-1); -- demolish the tree print("use_history Y: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); tree := wix_make_from_tuple(breakex("abc:1,def:1,GGa:1,GGb:1,GGc:1,GGd:1,GGe:1,GGGf:1,GGGg:1,GGGhh:1,GGGJ:1,GGGK:1")); -- re-initialize the tree all_ok := true; print("Iterative check; insertions at start"); all_ok := true; for j in [0..9] loop wix_insert(tree,1,newt(j)); -- make the insertion --print("After CUM_INS2 - iteration ",j," ",str(wix_dump(tree))); --print(str(wix_dump(tree))); if not check_consistency(tree) then print("CUM_INS2 - iteration ",j," error ",wix_dump(tree)); print(str(wix_dump(tree))); all_ok := false; stop; end if; end loop; if all_ok then print("Insertion-at-start test passed successfully."); end if; incref(tree,-1); -- demolish the tree print("use_history Z: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); tree := wix_make_from_tuple(breakex("abc:1,def:1,GGa:1,GGb:1,GGc:1,GGd:1,GGe:1,GGGf:1,GGGg:1,GGGhh:1,GGGJ:1,GGGK:1")); -- re-initialize the tree all_ok := true; print("Iterative check; insertions after second element"); for j in [0..9] loop wix_insert(tree,3,newt(j)); -- make the insertion print("After CUM_INS3 - iteration ",j," ",str(wix_dump(tree))); --print(str(wix_dump(tree))); if not check_consistency(tree) then print("CUM_INS3 - iteration ",j," error ",wix_dump(tree)); print(str(wix_dump(tree))); all_ok := false; stop; end if; end loop; if all_ok then print("Insertion-after-second test passed successfully."); end if; incref(tree,-1); -- demolish the tree print("use_history A1: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); end insertion_tests; procedure check_consistency(the_tree); -- consistency check for big_string trees if num_childr(the_tree) = 0 then return true; end if; if not wix_check_tree_structure(the_tree) then return false; end if; -- check that vector of leaves obtained directly is also obtained by collecting individual components if (lo := str(leaves_only(wix_dump(the_tree)))) /= (le := collect_leaves(the_tree)) then print("FAILURE: leaf discrepancy"); print(lo," ",str(le)," ",str(wix_dump(the_tree))); return false; end if; if num_childr(the_tree) = 0 then return true; end if; -- no cumulant check for empty tree -- check that cumulants advance properly if exists n in [2..wix_get_cum(the_tree)] | (((tn := wix_comp_cum(the_tree,n)) /= (tnm12 := wix_comp_cum(the_tree,n - 1))) and (tn(3) /= (tnm12(3) + tn(2)))) then print("FAILURE: first cumulant bad at position ",n," ",string_and_cum(tn)," ",string_and_cum(tnm12)," ",decode_wd(tn(1))); return false; end if; -- check that the first cumulant is good if (cc1 := wix_comp_cum(the_tree,1))(3) /= cc1(3) then print("FAILURE: first node cumulant in position 1 is not leaf length"); return false; end if; return true; end check_consistency; procedure leaves_only(tup); return [c: c in tup | ("(" notin c) and (")" notin c)]; end leaves_only; end test;