DB_btree_wdoc.stl
"B-tree variant for word occurence_strings, with 2 cumulants in parent, refcounts."
-- file B_tree_for_bigs_wdoc.stl package B_tree_for_wdocstring; -- B-trees, realized as objects -- *********************************************************************************************** -- ****** B-tree variant for word occurence_strings, with 2 cumulants in parent, refcounts ****** -- *********************************************************************************************** -- Refcounts can be ignored except in the wo_set_comp, wo_insert, split_node, share_right, join_left, -- and join_left routines. -- this B-tree structures the wdoccs_big_string which holds word occurences const wo_code_pts := {"wo_last", "wo_nlast2", "wo_nlast1", "wo_nc1", "wo_nc2", "wo_fpbot", "wo_fpsame", "wo_fpinfirst", "wo_fpinmid", "wo_fpinlast", "wo_set_copy", "wo_set_last1", "wox_set_last1", "wo_set_last2", "wox_set_last2", "wo_set_nlast1", "wo_set_nlast2", "wo_set_ndel_nc", "wo_set_ndel_comp", "wo_set_ndel_end", "wo_set_del_nc", "wo_set_del_have", "wo_set_del_comp", "wo_set_del_comp_end", "wo_set_del_enough", "wo_set_del_canpull", "wo_set_del_canjoin", "wo_set_enough", "wo_set_cutback", "wo_in_copy", "wo_in_nend2", "wo_in_nend1", "wo_in_nend3", "wo_in_end3", "wo_in_endnc", "wo_in_ncnosplit", "wo_in_ncsplit", "wo_in_endcomp", "wo_in_endcomp_nos", "wo_in_endcomp_split", "wo_in_endcomp_nosthis", "wo_in_endcomp_splitthis", "wo_in_nendnc_nos", "wo_in_nendnc_split", "wo_in_nendcomp_nos", "wo_in_nendcomp_split", "wo_in_nendcomp_nosthis", "wo_in_nendcomp_splitthis", "wo_pull_left", "wo_pull_right", "wo_share_copy", "wo_share_copy2", "wo_share_move_left", "wo_share_move_right", "wo_jleft_copy", "wo_jleft", "wo_jright_copy", "wo_jright"}; -- code points to be traversed var debug_flag := false; var prior_debug_c := 0,debug_c := 0; -- global variables for debugging var tree_level := 0; -- for tracking tree level during insertion --procedure wo_create(); --procedure wo_get_cum(rec); --procedure wo_get_cum2(rec); --procedure wo_comp(rec,j); --procedure wo_comp2(rec,j); --procedure wo_comp_cum(rec,x); --procedure wo_comp_cum2(rec,x); procedure wo_first_past_to(rec,key,beg,ennd); -- search for a key in the indicated range --procedure wo_set_comp(rw rec,w,x); -- assignment of the first component whose cumulant is at least w; w must be in range --procedure wo_set_comp2(rw rec,w,x);-- assignment of the first component whose second cumulant is at least w --procedure wo_insert(rw rec,j,x); -- insertion before j-th component; or at the end if j = OM --procedure wo_insert2(rw rec,j,x); -- insertion before component with second cumulant at least j; or at end (j = OM) procedure wo_dump(rec); -- get tuple from B-tree representation (DEBUGGING ONLY) procedure show_hex(occstg); -- show an occurences string as comma-separated hex (DEBUGGING ONLY) procedure shexify(stg); -- put into abbreviated hex (DEBUGGING ONLY) procedure wo_check_tree_structure(tree); -- recursive check of tree structure (DEBUGGING ONLY) end B_tree_for_wdocstring; package body B_tree_for_wdocstring; -- B-trees, realized as objects use setldb,byteutil,disk_records_pak,db_records,string_utility_pak; -- there are two cumulants, the total number of occurences and the corresponding record id procedure get_ch_cum(rec,j); -- get the integer cum value for the j-th child of this node return int_of_5(dr_load(rec)(wo_cum_start + (j - 1) * 5..wo_cum_start + j * 5 - 1)); end get_ch_cum; procedure hwo_get_cum(rec); -- get the integer cum value of the fina l child of this node return get_ch_cum(rec,num_childr(rec)); end hwo_get_cum; procedure hwo_get_cum2(rec); -- get the record id string cum value of the final child of this node return get_ch_cum2(rec,num_childr(rec)); end hwo_get_cum2; procedure set_ch_cum(rec,j,cum_int); -- set the integer cum value for the j-th child of this node stg := dr_load(rec); -- make sure that this record is loaded stg(wo_cum_start + (j - 1) * 5..wo_cum_start + j * 5 - 1) := stg_of_5(cum_int); dr_setrecbuf(rec,stg); dr_dirtify(rec); end set_ch_cum; procedure get_ch_cum2(rec,j); -- get the record id string cum value for the j-th child of this node return dr_load(rec)(wo_cum2_start + (j - 1) * 4..wo_cum2_start + j * 4 - 1); end get_ch_cum2; procedure hwo_create(); -- creation rout ine rec := dr_new_rec(); -- create a new record (at this point, the record is loaded -- dirty, and ho set_type(rec,wdoccs_str_node_ncr); -- set to non-compound tree return rec; end hwo_create; procedure set_ch_cum2(rec,j,new_rec_id); -- set the record id string cum value for the j-th child of this node stg := dr_load(rec); -- make sure that this record is loaded stg(wo_cum2_start + (j - 1) * 4..wo_cum2_start + j * 4 - 1) := new_rec_id; dr_setrecbuf(rec,stg); dr_dirtify(rec); end set_ch_cum2; procedure vect_of_children(rec); -- gets vector of children, as a string of 4-byte record numbers nch := num_childr(rec); -- number of children return dr_load(rec)(wo_ch_start..wo_ch_start - 1 + nch * 4); end vect_of_children; procedure set_vect_of_children(rec,stg); -- sets vector of children, from a string of 4-byte record numbers --print("set_vect_of_children: ",hexify(rec)," ",hexify(stg)); set_num_childr(rec,(nstg := #stg)/4); -- set the number of children (also loads) missing := wo_cum_start - wo_ch_start - #stg; --if missing < 0 then print("missing,wo_cum_start,wo_ch_start,#stg: ",missing," ",#stg," ",hexify(stg)); stop; end if; stg2:=dr_load(rec); stg2(wo_ch_start..wo_cum_start - 1) := (stg + missing * "\x00"); -- set the children, remembering not to change the length of the string section containing them dr_setrecbuf(rec,stg2); dr_dirtify(rec); end set_vect_of_children; procedure vect_of_cums(rec); -- gets vector of cums, as a string of 5-byte fields nch := num_childr(rec); -- number of children return dr_load(rec)(wo_cum_start..wo_cum_start - 1 + nch * 5); end vect_of_cums; procedure set_vect_of_cums(rec,stg); -- sets vector of cumulants, from a string of 5-byte record numbers missing := wo_cum2_start - wo_cum_start - #stg; stg2:=dr_load(rec); -- force load stg2(wo_cum_start..wo_cum2_start - 1) := (stg + missing * "\x00"); -- set the cumulants, remembering not to change the length of the string section containing them dr_setrecbuf(rec,stg2); dr_dirtify(rec); end set_vect_of_cums; procedure vect_of_cums2(rec); -- gets vector of cums, as a string of 4-byte fields nch := num_childr(rec); -- number of children return dr_load(rec)(wo_cum2_start..wo_cum2_start - 1 + nch * 4); end vect_of_cums2; procedure set_vect_of_cums2(rec,stg); -- sets vector of children, from a string of 4-byte record numbers missing := rec_size + 1 - wo_cum2_start - #stg; stg2:=dr_load(rec); -- force load stg2(wo_cum2_start..rec_size) := (stg + missing * "\x00"); -- set the children, remembering not to change the length of the string section containing them dr_setrecbuf(rec,stg2); dr_dirtify(rec); end set_vect_of_cums2; procedure voc(rec,j); -- n'th member of vector of children stg := dr_load(rec); -- load this string cjstrt := (j - 1) * 4 + wo_ch_start; return stg(cjstrt..cjstrt + 3); -- return child rec end voc; procedure set_voc(rec,j,chrec); -- set n'th member of vector of children cjstrt := (j - 1) * 4 + wo_ch_start; stg2:=dr_load(rec); -- force load; then set the first character of the record stg2(cjstrt..cjstrt + 3) := chrec; dr_setrecbuf(rec,stg2); dr_dirtify(rec); end set_voc; procedure wo_dump(rec); -- get tuple from nested representation -- is compound representation or direct, depending on whether 'is_compound' flag is set -- note that this shows the vector of components, without their cumulants -- we ignore the cumulants in the tree nodes var indent := 0; return wo_dump_in(rec); -- call inner workhorse procedure wo_dump_in(rec); -- inner workhorse if dr_is_compound(rec) then -- compound case indent +:= 1; nc := num_childr(rec); stg := (["\n" + (indent * " ") + "("] +/ [wo_dump_in(voc(rec,j)): j in [1..nc]]) + ["\n" + (indent * " ")+ "[" + str(nc) + "]" + str(wo_get_cum(rec)) + "," + shexify(wo_get_cum2(rec)) + ")"]; indent -:= 1; return stg; end if; -- done with compound case t := []; for j in [1..num_childr(rec)] loop chj := voc(rec,j); svj := if (lvj := wo_length(chj)) > 0 then show_hex(wo_slice(chj,1,lvj)) else "" end if; t with:= svj; end loop; return ["\n" + (indent * " ") + "("] + t + [str(wo_get_cum(rec)) + "," + shexify(wo_get_cum2(rec)) + ")"]; end wo_dump_in; end wo_dump; procedure show_hex(occstg); -- show an occurences string as comma-separated hex return "" +/ [shexify(occstg(j..j + 3)) + if j > #occstg - 4 then "" else "," end if: j in [1,5..#occstg]]; end show_hex; procedure shexify(stg); -- put into abbreviated hex ns := #(stg := hexify(stg)); zers := span(stg,"0"); return if ns = 8 then ":" + stg else ":" + stg + ":" + str(#zers) end if; end shexify; procedure hwo_comp(rec,j); -- fetch of component containing cumulant j return comp_cum_ix(rec,1,j)(1); end hwo_comp; procedure hwo_comp2(rec,j); -- fetch of component containing second cumulant j return comp_cum_ix(rec,2,j)(1); end hwo_comp2; procedure hwo_comp_cum(rec,x); -- fetch of x-th component, with cumulan ts return comp_cum_ix(rec,1,x); end hwo_comp_cum; procedure hwo_comp_cum2(rec,x); -- fetch of component containing second cumulant x, with cumulants return comp_cum_ix(rec,2,x); end hwo_comp_cum2; procedure comp_cum_ix(rec,srch_on,x); -- fetch of x-th component by search on specified cumulant -- component should be found by binary search -- search for the first index component with cumulant past -- the specified x; Return OM if there is none such. -- this returns the triple [leaf,int_cum_of_leaf,second_cum_of_leaf] -- NOTE: this should be by binary search -- find the first node for which a cumulant >= j if x = OM then -- want last node pass("wo_last"); j := num_childr(rec); the_cum := if srch_on = 1 then get_ch_cum(rec,j) else get_ch_cum2(rec,j) end if; else if srch_on = 2 then iofx := int_of_4(x); pass("wo_nlast2"); else pass("wo_nlast1"); end if; if not (exists j in [1..num_childr(rec)] | if srch_on = 1 then (the_cum := get_ch_cum(rec,j)) >= x else int_of_4(the_cum := get_ch_cum2(rec,j)) >= iofx end if) then --print("******** return OM: ",x," ",nc := num_childr(rec)," ",wo_dump(rec)," ",if nc > 0 then get_ch_cum(rec,nc) else "NONE" end if); stop; return OM; end if; end if; -- desired node not found if not dr_is_compound(rec) then -- if node is not compound we have what we want if srch_on = 1 then pass("wo_nc1"); return [voc(rec,j),the_cum,get_ch_cum2(rec,j)]; else pass("wo_nc2"); return [voc(rec,j),get_ch_cum(rec,j),the_cum]; end if; end if; if srch_on = 1 then prev_cum := if j = 1 then 0 else get_ch_cum(rec,j - 1) end if; end if; -- the preceding cumulant (needed in the integer case only) res := comp_cum_ix(voc(rec,j),srch_on,if x = OM or srch_on = 2 then x else x - prev_cum end if); -- continue the search recursively [rec_no,retcum,retcum2] := res; return [rec_no] + [prev_cum + retcum,retcum2]; end comp_cum_ix; procedure wo_first_past_to(rec,key,beg,ennd); -- search for a key in the indicated range -- this routine returns the pair [first_past_key,its_ix] stg := dr_load(rec); -- read contents of the record -- if the node is a wdoccs_string_record we search for the key directly if stg(type_byte) = wdoccs_string_record then pass("wo_fpbot"); ikey := int_of_4(key); -- convert to integer for purposes of comparison if exists j in [beg..ennd min wo_length(rec)] | int_of_4(wos := wo_slice(rec,j,j)) >= ikey then return [wos,j]; -- (this should be a binary search) end if; return OM; -- otherwise the desired key cannot be found end if; -- otherwise find the children containing the start and end leaves of the specified range must := (exists ixbeg in [1..ncr := num_childr(rec)] | get_ch_cum(rec,ixbeg) >= beg); if not must then print("** INDICATED STARTING POINT NOT PRESENT IN OCCURENCE VECTOR ** : ncr,beg:", ncr,beg); stop; end if; must := (exists ixend in [ixbeg..ncr] | get_ch_cum(rec,ixend) >= ennd); if not must then print("** INDICATED ENDING POINT NOT PRESENT IN OCCURENCE VECTOR ** ncr,ennd:", ncr,ennd); stop; end if; prev_cum := if ixbeg = 1 then 0 else get_ch_cum(rec,ixbeg - 1) end if; if ixbeg = ixend then -- both children are the same; search down recursively pass("wo_fpsame"); if (res := wo_first_past_to(voc(rec,ixbeg),key,beg - prev_cum,ennd - prev_cum)) = OM then return OM; -- the desired key cannot be found end if; [first_past_key,its_ix] := res; return [first_past_key,its_ix + prev_cum]; end if; -- otherwise search the first node to its end, then the middle nodes, then the last if (res := wo_first_past_to( first_ch := voc(rec,ixbeg),key,beg - prev_cum,get_ch_cum(rec,ixbeg) - prev_cum)) /= OM then -- desired key is found in first child [first_past_key,its_ix] := res; pass("wo_fpinfirst"); return [first_past_key,its_ix + prev_cum]; end if; ikey := int_of_4(key); -- convert to integer for purposes of comparison if exists j in [ixbeg + 1,ixend - 1] | int_of_4(get_ch_cum2(rec,j)) >= ikey then -- have found appropriate child -- (should be binary search) chj := voc(rec,j); chj_len := (get_ch_cum(rec,j) - (prev_cum := get_ch_cum(rec,j - 1))); [first_past_key,its_ix] := wo_first_past_to(chj,key,1,chj_len); pass("wo_fpinmid"); return [first_past_key,its_ix + prev_cum]; end if; prev_cum := get_ch_cum(rec,ixend - 1); -- get the previous cum if (res := wo_first_past_to(voc(rec,ixend),key,1,ennd - prev_cum)) /= OM then -- desired key is found in last child [first_past_key,its_ix] := res; pass("wo_fpinlast"); return [first_past_key,its_ix + prev_cum]; end if; return OM; -- otherwise the desired key cannot be found end wo_first_past_to; procedure hwo_set_comp(rw rec,w,x);-- assignment of the first component whose second cumulant is at least w set_comp_ix(rec,1,w,x); end hwo_set_comp; procedure hwo_set_comp2(rw rec,w,x);-- assignment of the first component whose second cumulant is at least w set_comp_ix(rec,2,w,x); end hwo_set_comp2; procedure set_comp_ix(rw rec,srch_on,w,x); -- assignment of the first component whose specified cumulant is at least w; w must be in range -- we must first copy rec if its refcount is greater than 1, and transfer one reference -- from its old to its copied version --print("set_comp_ix: ",wo_dump(rec)); if refcount(int_of_4(rec)) > 1 then -- must copy pass("wo_set_copy"); stg := dr_load(rec); new_r := dr_new_rec();dr_setrecbuf(new_r,stg); dr_dirtify(new_r); increfs(new_r,1); incref(rec,-1); rec := new_r; -- substitute copy for original end if; ic := dr_is_compound(rec); if srch_on = 2 then wint := int_of_4(w); end if; ncr := num_childr(rec); if w = OM then -- make change in last node if srch_on = 1 then pass("wo_set_last1"); w := get_ch_cum(rec,ix := ncr); else pass("wo_set_last2"); w := get_ch_cum2(rec,ix := ncr); end if; if ix = 0 then print("Deletion or change at end of empty tree is not allowed."); stop; end if; elseif not (exists ix in [1..ncr] | if srch_on = 1 then get_ch_cum(rec,ix) >= w else int_of_4(get_ch_cum2(rec,ix)) >= wint end if) then if srch_on = 1 then pass("wox_set_last1"); w := get_ch_cum(rec,ix := ncr); else pass("wox_set_last2"); w := get_ch_cum2(rec,ix := ncr); end if; if ix = 0 then print("Deletion or change at very end of empty tree is not allowed."); stop; end if; end if; -- get the local cumulant of the child if srch_on = 1 then old_cum := if ic then wo_get_cum(voc(rec,ix)) else wo_length(voc(rec,ix)) end if; -- find the last cumulant value of voc(rec,ix), or the length of active part of voc(rec,ix) pass("wo_set_nlast1"); else pass("wo_set_nlast2"); old_cum := if ic then wo_get_cum2(voc(rec,ix)) else wo_slice(nd,wol := wo_length(nd),wol) end if; -- find the last cumulant value of voc(rec,ix), or the last occurence in voc(rec,ix) end if; --print("old_cum: ",old_cum," ",hexify(dr_load(voc(rec,ix)))); if srch_on = 1 then prev_cum := if ix = 1 then 0 else get_ch_cum(rec,ix - 1) end if; end if; if x /= OM then -- we are not dealing with a deletion if not ic then -- non-compound case; we must add the change in the leaf cumulant -- to the cumulant of all following nodes. old_x := voc(rec,ix); set_voc(rec,ix,x); -- change the component xfref(old_x,x); -- transfer reference from old to new component cum_change := wo_length(x) - old_cum; update_cums(rec,ix,cum_change); -- update the cums,starting with the given child set_ch_cum2(rec,ix,wo_slice(x,wol := wo_length(x),wol)); -- modify the cum of the changed component. Note that the following -- second cumulants need not be changed. However, if ix references the -- final child of rec, a cum2 entry in the parent of rec may need to be changed below. pass("wo_set_ndel_nc"); return; -- done with this case end if; -- otherwise we have the compound case pass("wo_set_ndel_comp"); the_child := voc(rec,ix); -- the next operation may copy the child wo_set_comp(the_child,if srch_on = 1 then w - prev_cum else w end if,x); -- make the change in the child cum_change := wo_get_cum(the_child) - old_cum; set_voc(rec,ix,the_child); -- put the revised, possibly copied child back into position -- the 'update_cums' procedure which we call now must then start with the (properly set) -- cumulant of the preceding node, and then left_add the change in the the cumulant -- to the cumulant of this child to all the subsequent children. This assumes that -- (new_d - old_d) + c + old_d + e = c + new_d + e -- for all cumulant values. This is obviously true for values using associative-commutative -- cumulator functions with an inverse, and also in the (string) case where a + b = b. update_cums(rec,ix,cum_change); -- update the cums,starting with the given child -- the final cumulant of the_child might have been changed by -- the preceding wo_set_comp(the_child,..) operation; see preceding comment. if ix = ncr then set_ch_cum2(rec,ix,wo_get_cum2(the_child)); pass("wo_set_ndel_end"); end if; --print("compound self,ix: ",wo_get_cum()," ",ix); return; -- done with the non-deletion case end if; -- otherwise we are dealing with a deletion if not dr_is_compound(rec) then -- non-compound case the_ch := vect_of_children(rec); the_child := the_ch(4 * ix - 3..4 * ix); -- capture the child before removal incref(the_child,-1); -- drop one reference the_ch(4 * ix - 3..4 * ix) := ""; pass("wo_set_del_nc"); if #the_ch > 0 then -- need not delete cumulant if no remaining children stg := vect_of_cums(rec); -- get the vector of cums stg((ix - 1) * 5 + 1..ix * 5) := ""; -- drop one element set_vect_of_cums(rec,stg); -- put back into place stg := vect_of_cums2(rec); -- get the second vector of cums stg((ix - 1) * 4 + 1..ix * 4) := ""; -- drop one element set_vect_of_cums2(rec,stg); -- put back into place pass("wo_set_del_have"); end if; -- delete the child from the string of descendants set_vect_of_children(rec,the_ch); -- delete the ix-th node -- delete the cumulant of the ix-th node. note that this has already been loaded cum_change := -old_cum; update_cums(rec,ix,cum_change); -- update the cums,starting with the appropriate child return; -- done with this case end if; -- otherwise we are dealing with a deletion in a compound case the_child := voc(rec,ix); -- get the child old_ch_leafsum := wo_get_cum(old_child := the_child); wo_set_comp(the_child,w - prev_cum,OM); -- make the deletion in the child new_ch_leafsum := wo_get_cum(the_child); set_voc(rec,ix,the_child := the_child); -- re-insert the possibly modified child cum_change := new_ch_leafsum - old_ch_leafsum; update_cums(rec,ix,cum_change); -- update the cums,starting with the given child -- since the last child may have been deleted, we also need to update the second cum pass("wo_set_del_comp"); if ix = ncr then set_ch_cum2(rec,ix,wo_get_cum2(the_child)); pass("wo_set_del_comp_end"); end if; -- now it is possible that the child has lost enough children to have fallen below the required wo_low_lim -- if this has happened, we attempt to share or join children with one of the adjacent siblings if num_childr(the_child) >= wo_low_lim then pass("wo_set_del_enough"); return; end if; -- otherwise try to join or share if pull_from_left(rec,ix) or pull_from_right(rec,ix) then pass("wo_set_del_canpull"); return; end if; if join_with_left(rec,ix) or join_with_right(rec,ix) then pass("wo_set_del_canjoin"); null; end if; -- In the 'join' case, either the left or the right join must work, -- since in this compound case we must have at least one sibling. -- but we must check to see if the node being processed has fallen to -- just one child, and if it has, replace it by its single child. if num_childr(rec) > 1 then pass("wo_set_enough"); return; end if; pass("wo_set_cutback"); ch_rec := dr_load(the_ch := voc(rec,1)); dr_load(rec); dr_setrecbuf(rec,ch_rec); dr_dirtify(rec); set_vect_of_children(the_ch,""); -- the children have all moved incref(the_ch,-1); -- child data is inherited from single child, which loses a reference end set_comp_ix; procedure update_cums(rec,ix,cum_change); -- update the cums of this tree,starting with the given child -- now add this difference to all subsequent children for j in [ix..nvc := num_childr(rec)] loop set_ch_cum(rec,j,cum_change + get_ch_cum(rec,j)); end loop; end update_cums; procedure cumulate(rec); -- initalize the cumulants of a node whose children are either leaves or already initialized --print("CUMULATE!"); ic := dr_is_compound(rec); -- determine if compound the_cum := 0; for j in [1..num_childr(rec)] loop nd := voc(rec,j); -- get the j-th child --print("Child ",j," = ",int_of_4(nd)); nd_cum := if ic then wo_get_cum(nd) else wo_length(nd) end if; --print("nd_cum=",nd_cum); -- cumulant of the final child of the subnode, or occurence string length the_cum := the_cum + nd_cum; set_ch_cum(rec,j,the_cum); -- update the child's cumulant value nd_cum2 := if ic then wo_get_cum2(nd) else wo_slice(nd,wol := wo_length(nd),wol) end if; --print(int_of_4(nd_cum2)); -- cumulant2 of the final child of the subnode, or last element of occurence string set_ch_cum2(rec,j,nd_cum2); -- update the child's cumulant value end loop; end cumulate; procedure hwo_insert(rw rec,j,x); -- insertion before j-th component; or at the end if j = OM insert(rec,1,j,x); end hwo_insert; procedure hwo_insert2(rw rec,j,x); -- insertion before component with second cumulant at least j; insert(rec,2,j,x); -- or at the end if j = OM end hwo_insert2; procedure insert(rw rec,srch_on,j,x); -- insertion before j-th component with at least specified cumulant; tree_level := 0; -- note that we are at the top of the tree insert_in(rec,srch_on,j,x); -- call the inner routine end insert; procedure insert_in(rw rec,srch_on,j,x); -- insertion before j-th component with at least specified cumulant; -- or at the end if j = OM -- component should be found by binary search if refcount(int_of_4(rec)) > 1 then -- must copy pass("wo_in_copy"); stg := dr_load(rec); new_r := dr_new_rec(); dr_setrecbuf(new_r,stg); dr_dirtify(new_r); increfs(new_r,1); incref(rec,-1); rec := new_r; -- substitute copy for original end if; result := OM; -- in case desired element not found ic := dr_is_compound(rec); ncr := num_childr(rec); the_cum := wo_get_cum(rec); -- get final cumulant of this tree wol := x_cum := wo_length(x); -- cumulant value of the leaf x x_cum2 := wo_slice(x,wol,wol); -- second cumulant value of the leaf x if not ic then incref(x,1); end if; -- at the bottom level, the inserted leaf takes on an extra reference if j /= OM then -- look for target node of insertion, if any if srch_on = 2 then pass("wo_in_nend2"); iofj := int_of_4(j); else pass("wo_in_nend1"); end if; if exists ix in [1..ncr] | if srch_on = 1 then (cum := get_ch_cum(rec,ix)) >= j else int_of_4(cum := get_ch_cum2(rec,ix)) >= iofj end if then result := [ix,if ix = 1 then 0 else get_ch_cum(rec,ix - 1) end if,cum]; nd := voc(rec,ix); pass("wo_in_nend3"); else pass("wo_in_end3"); result := OM; end if; end if; if result = OM then -- we have insertion at the very end if not ic then -- simply append to vector pass("wo_in_endnc"); set_vect_of_children(rec,vect_of_children(rec) + x); -- add the cumulant of x to the present cumulant of this tree set_ch_cum(rec,nvc := ncr + 1,the_cum := the_cum + x_cum); set_ch_cum2(rec,nvc,x_cum2); --print("vect_of_children(rec): ",nvc," ",num_childr(rec)," ",ncr," ",wo_hi_lim + 1," ",hexify(vect_of_children(rec))); if tree_level > 0 or nvc <= wo_hi_lim then -- no need to split pass("wo_in_ncnosplit"); return; end if; -- otherwise we must split, and becomes compound pass("wo_in_ncsplit"); -- note that in this case we are at the very top of the tree set_vect_of_children(rec,two_halves(rec)); -- split into 2 non-compound subtrees set_ch_cum(rec,1,wo_get_cum(voc(rec,1))); set_ch_cum(rec,2,the_cum); set_ch_cum2(rec,1,wo_get_cum2(voc(rec,1))); set_ch_cum2(rec,2,wo_get_cum2(voc(rec,2))); set_is_compound(rec,true); -- note that it is indeed compound return; -- done with this case end if; -- otherwise we have insertion at the very end of a compound vector last_child:= voc(rec,nvc := num_childr(rec)); -- get the last child tree_level +:= 1; -- go down a level insert_in(last_child,srch_on,OM,x); -- insert at the end of this last child tree_level -:= 1; -- come up a level set_voc(rec,ncr,last_child); -- insert the possibly modified child back into vect_of_children -- add the cumulant of x to the present cumulant of this tree set_ch_cum(rec,ncr,the_cum := the_cum + x_cum); set_ch_cum2(rec,ncr,x_cum2); pass("wo_in_endcomp"); if num_childr(last_child) <= wo_hi_lim then -- no need to split pass("wo_in_endcomp_nos"); return; end if; -- otherwise we must split the last child pass("wo_in_endcomp_split"); split_node(rec,nvc); -- split the nvc-th node into two. we insert an empty node to the right -- of node nvc, and then move half the children of the nvc-th node into the new node nvc := num_childr(rec); if tree_level > 0 or nvc <= wo_hi_lim then -- no need to split this node pass("wo_in_endcomp_nosthis"); return; -- done with this case end if; -- otherwise we are at the very top of the tree, so we must split it into two parts and create a new tree level set_vect_of_children(rec,two_halves(rec)); set_ch_cum(rec,1,wo_get_cum(voc(rec,1))); set_ch_cum(rec,2,the_cum); set_ch_cum2(rec,1,wo_get_cum2(voc(rec,1))); set_ch_cum2(rec,2,wo_get_cum2(voc(rec,2))); pass("wo_in_endcomp_splitthis"); return; -- done with this case end if; -- end of case of insertion at the very end -- in the remaining cases we have an insertion before one of our nodes [ix,prev_cum,cum] := result; -- decode the result returned, geting the insertion result and the preceding cumulant if not ic then -- insertion at appropriate position in non-compound vector the_ch := vect_of_children(rec); the_ch(4 * ix - 3..4 * ix - 4) := x; the_cums := vect_of_cums(rec); the_cums2 := vect_of_cums2(rec); the_cums(5 * ix - 4..5 * ix - 5) := if ix = 1 then "\x00\x00\x00\x00\x00" else the_cums(5 * ix - 9..5 * ix - 5) end if; the_cums2(4 * ix - 3..4 * ix - 4) := x_cum2; set_vect_of_children(rec,the_ch); -- make insertion into list of children set_vect_of_cums(rec,the_cums); -- make insertion into list of cums set_vect_of_cums2(rec,the_cums2); -- make insertion into list of cums update_cums(rec,ix,x_cum); -- adjust the given and following cumulants if tree_level > 0 or (nvc := num_childr(rec)) <= wo_hi_lim then -- no need to split pass("wo_in_nendnc_nos"); return; -- done with this case end if; -- otherwise we must split, and becomes compound pass("wo_in_nendnc_split"); -- otherwise we are at the very top of the tree, so we must split it into two parts and create a new tree level set_vect_of_children(rec,two_halves(rec)); set_ch_cum(rec,1,wo_get_cum(voc(rec,1))); set_ch_cum(rec,2,the_cum + x_cum); set_ch_cum2(rec,1,wo_get_cum2(voc(rec,1))); set_ch_cum2(rec,2,wo_get_cum2(voc(rec,2))); set_is_compound(rec,true); -- note that it is indeed compound --print("just became compound: ",dr_is_compound(rec)," ",wo_dump(rec)); stop; return; -- done with this case end if; -- othewise we deal with the compound case c := voc(rec,ix); -- get the child into which the insertion will now be made -- and get the position in this child at which the insertion will be made tree_level +:= 1; -- go down a level insert_in(c,srch_on,if srch_on = 1 then j - prev_cum else j end if,x); -- insert x into this child tree_level -:= 1; -- come up a level set_voc(rec,ix,c); -- put the possibly revised child back into position update_cums(rec,ix,x_cum); -- adjust the given and following cumulants set_ch_cum2(rec,ix,wo_get_cum2(c)); -- adjust the second cumulant on record for the child -- which may have changed if the insertion was at the end of the child if #(vect_of_children(c))/4 <= wo_hi_lim then -- no need to split the child pass("wo_in_nendcomp_nos"); return; end if; -- otherwise we must split the child pass("wo_in_nendcomp_split"); split_node(rec,ix); -- split the child into two. we insert an empty node to the right -- of node ix, and then move half the nodes into it nvc := num_childr(rec); if tree_level > 0 or nvc <= wo_hi_lim then -- no need to split this node pass("wo_in_nendcomp_nosthis"); return; -- done with this case end if; -- otherwise we are at the very top of the tree, so we must split it into two parts and create a new tree level pass("wo_in_nendcomp_splitthis"); set_vect_of_children(rec,two_halves(rec)); set_ch_cum(rec,1,wo_get_cum(voc(rec,1))); set_ch_cum(rec,2,the_cum + x_cum); set_ch_cum2(rec,1,wo_get_cum2(voc(rec,1))); set_ch_cum2(rec,2,wo_get_cum2(voc(rec,2))); end insert_in; procedure two_halves(rec); -- split this tree into two halves -- the two nodes created share the children of the original tree, so that the refcounts of -- these children need no adjustment. The nodes created each have a refcount of 1. -- Note that this routine is only called if rec has no parent. the_type := if (ic := dr_is_compound(rec)) then wdoccs_str_node_record else wdoccs_str_node_ncr end if; u1 := dr_new_rec(); u2 := dr_new_rec(); -- make and initialize two subtrees set_type(u1,the_type); set_type(u2,the_type); -- the halves are compound iff this tree is compound -- we must subtract the cumulant of the last retained child from all the children that move -- to get the cumulant of the left node hnvc := (nvc := num_childr(rec))/2; cum_last_retained := get_ch_cum(rec,hnvc); for j in [hnvc + 1..nvc] loop -- subtract this from the cumulant of each child that will move set_ch_cum(rec,j,get_ch_cum(rec,j) - cum_last_retained); end loop; vocums := vect_of_cums(rec); -- get the vector of cums vocums2 := vect_of_cums2(rec); -- get the vector of cums set_vect_of_children(u1,(voch := vect_of_children(rec))(1..4 * hnvc)); set_vect_of_cums(u1,vocums(1..5 * hnvc)); -- u1 gets half the children and cums set_vect_of_cums2(u1,vocums2(1..4 * hnvc)); -- u1 gets half the children and cums set_vect_of_cums(u2,vocums(5 * hnvc + 1..)); -- the second half inherits adjusted cumulants from the original tree set_vect_of_cums2(u2,vocums2(4 * hnvc + 1..)); -- likewise for the second cums set_vect_of_children(u2,voch(4 * hnvc + 1..)); -- u2 gets the other half of the children --print("two_halves: ",str(wo_dump(u1)),"\n",str(wo_dump(u2)),"\n",hexify(voch(4 * hnvc + 1..)),"\n",hexify(dr_load(u2))); return u1 + u2; -- assemble the two nodes of the new compound tree; return as a string end two_halves; procedure pull_from_left(rec,k); -- split children with left sibling if k = 1 or num_childr(voc(rec,k - 1)) <= wo_low_lim then return false; end if; pass("wo_pull_left"); share_right(rec,k - 1); return true; end pull_from_left; procedure pull_from_right(rec,k); -- split children with right sibling if k >= (nrv := num_childr(rec)) or num_childr(voc(rec,k + 1)) <= wo_low_lim then return false; end if; pass("wo_pull_right"); share_right(rec,k); return true; end pull_from_right; procedure share_right(rec,k); -- share children with right-hand sibling -- we divide the children of the k-th node, together with those of the k+1'st, -- into two roughly equal groups, and make these the k-th and k+1'st nodes. The -- cumulative totals must be adjusted in the k-th node, in the -- children moved between nodes, and in the children of the k+1'st node -- this routine must first copy the nodes among which children will move, if they have more than 1 reference. -- but it does not change the number of references to the children, so that their refcounts need no adjustment nchkp1 := #(rkp1 := vect_of_children(ndkp1 := voc(rec,k + 1)))/4; nchk := #(rk := vect_of_children(ndk := voc(rec,k)))/4; -- get the two groups of children, and their lengths numleft := (nchk + nchkp1)/2; -- half the children; the left-hand will get this number of children if refcount(int_of_4(ndkp1)) > 1 then -- must copy pass("wo_share_copy"); stg := dr_load(ndkp1); new_r := dr_new_rec(); dr_setrecbuf(new_r,stg); dr_dirtify(new_r); incref(ndkp1,-1); ndkp1 := new_r; -- substitute copy for original increfs(new_r,1); set_voc(rec,k + 1,new_r); end if; if refcount(int_of_4(ndk)) > 1 then -- must copy pass("wo_share_copy2"); stg := dr_load(ndk); new_r := dr_new_rec(); dr_setrecbuf(new_r,stg); dr_dirtify(new_r); incref(ndk,-1); ndk := new_r; -- substitute copy for original increfs(new_r,1); set_voc(rec,k,new_r); end if; if numleft > nchk then -- children will move left pass("wo_share_move_left"); num_mov := numleft - nchk; -- the number that will move left --print("move left: ",num_mov," ",hexify(dr_load(ndk)),"\n",hexify(dr_load(ndkp1))); -- we must subtract the cumulant of the last child moving left -- from the cumulants of all the right-hand children which do not move, -- and must add this to the cumulant of the k-th node. We must also -- add the cumulant of the last left-hand child of ndk to the cumulants of all -- the right-hand children which do move. ndk_cum := get_ch_cum(rec,k); -- get the cumulant of ndk right_cum := wo_get_cum(ndk); -- cumulant of the last child of ndk moved_cum := get_ch_cum(ndkp1,num_mov); -- cumulant of the last child of ndkp1 that moves set_ch_cum(rec,k,moved_cum + ndk_cum); -- add moved_cum to the cumulant of the k-th node set_ch_cum2(rec,k,get_ch_cum2(ndkp1,num_mov)); -- correct the second cumulant of the k-th node for j in [1..num_mov] loop set_ch_cum(ndkp1,j,right_cum + get_ch_cum(ndkp1,j)); end loop; for j in [num_mov + 1..nchkp1] loop set_ch_cum(ndkp1,j,get_ch_cum(ndkp1,j) - moved_cum); end loop; --print("move the children: ",rk," ",rkp1," ",num_mov); -- and now we must move the corresponding cums cumvp1 := vect_of_cums(ndkp1); -- second vector of cums --print("ndkp1: ",hexify(ndkp1)," ",hexify(dr_load(ndkp1))); stop; set_vect_of_cums(ndk,vect_of_cums(ndk) + cumvp1(1..num_mov * 5)); -- move the cums in set_vect_of_cums(ndkp1,cumvp1(num_mov * 5 + 1..)); -- move the cums out -- since the number of cums of ndkp1 is defined by its number of children, we need not edit that list cumvp1 := vect_of_cums2(ndkp1); -- second vector of second cums --print("ndkp1: ",hexify(ndkp1)," ",hexify(dr_load(ndkp1))); stop; set_vect_of_cums2(ndk,vect_of_cums2(ndk) + cumvp1(1..num_mov * 4)); -- move the cums in set_vect_of_cums2(ndkp1,cumvp1(num_mov * 4 + 1..)); -- move the cums out set_vect_of_children(ndk,rk + rkp1(1..4 * num_mov)); -- now actually move the children set_vect_of_children(ndkp1,rkp1(4 * num_mov + 1..)); else -- children will move right pass("wo_share_move_right"); -- we must subtract the cumulant of the last remaining child -- from that of each of the children moving right. The remaining cumulant of -- the last child moving right must then be added to the cumulants of -- all the original children of the (k + 1)-st node rem_left_cum := get_ch_cum(ndk,numleft); -- cumulant of the last remaining child of ndk for j in [numleft + 1..nchk] loop -- subtract this from the cum of all the nodes moving right set_ch_cum(ndk,j,get_ch_cum(ndk,j) - rem_left_cum); end loop; total_moved_cum := get_ch_cum(ndk,nchk); -- get the cumulant of the last node moving right for j in [1..nchkp1] loop -- add this to the cum of all the children of the (k + 1)-st node set_ch_cum(ndkp1,j,total_moved_cum + get_ch_cum(ndkp1,j)); end loop; set_ch_cum(rec,k,get_ch_cum(rec,k) - total_moved_cum); -- the cumulant of the last node moving right must be subtracted from the cumulant of the k-th node set_ch_cum2(rec,k,get_ch_cum2(ndk,numleft)); -- update the second cum of the moved node -- and now we must move the corresponding cums cumv := vect_of_cums(ndk); -- first vector of cums cumvp1 := vect_of_cums(ndkp1); -- second vector of cums set_vect_of_cums(ndkp1,cumv(5 * numleft + 1..5 * nchk) + vect_of_cums(ndkp1)); -- move the cums -- since the number of cums of ndk is defined by its number of children, we need not edit that list cumv := vect_of_cums2(ndk); -- first vector of cums cumvp1 := vect_of_cums2(ndkp1); -- second vector of cums set_vect_of_cums2(ndkp1,cumv(4 * numleft + 1..4 * nchk) + vect_of_cums2(ndkp1)); -- move the cums set_vect_of_children(ndk,rk(1..4 * numleft)); -- now actually move the children set_vect_of_children(voc(rec,k + 1),rk(4 * numleft + 1..) + rkp1); end if; end share_right; procedure join_with_left(rec,k); -- join to left sibling -- this routine must copy the left sibling if it has a refcount > 1; and must reduce the -- refcount of the right sibling if k = 1 then return false; end if; rk := vect_of_children(ndk := voc(rec,k)); nchkm1 := #(rkm1 := vect_of_children(ndkm1 := voc(rec,k - 1)))/4; --print("join_with_left: ",nchkm1," ",ic); if refcount(int_of_4(ndkm1)) > 1 then -- must copy pass("wo_jleft_copy"); stg := dr_load(ndkm1); new_r := dr_new_rec(); dr_setrecbuf(new_r,stg); dr_dirtify(new_r); incref(ndkm1,-1); ndkm1 := new_r; -- substitute copy for original increfs(new_r,1); set_voc(rec,k + 1,new_r); end if; pass("wo_jleft"); -- the cumulant of the left sibling simply becomes that of the right sibling set_ch_cum(rec,k - 1,get_ch_cum(rec,k)); set_ch_cum2(rec,k - 1,get_ch_cum2(rec,k)); last_left_cums := wo_get_cum(ndkm1); -- the cumulant of the last child of the left sibling -- must be added to that of every child of the right sibling for j in [1..#rk/4] loop set_ch_cum(ndk,j,last_left_cums + get_ch_cum(ndk,j)); end loop; set_vect_of_cums(ndkm1,vect_of_cums(ndkm1) + vect_of_cums(ndk)); -- the left sibling gets all the cumulants set_vect_of_cums2(ndkm1,vect_of_cums2(ndkm1) + vect_of_cums2(ndk)); -- the left sibling gets all the cumulants set_vect_of_children(ndkm1,rkm1 + rk); -- the left sibling gets all the children -- now delete the k-th cumulant the_cums := vect_of_cums(rec); -- get the cums of the current node the_cums(5 * k - 4..5 * k) := ""; set_vect_of_cums(rec,the_cums); -- delete the k-th cumulant the_cums := vect_of_cums2(rec); -- get the second cums of the current node the_cums(4 * k - 3..4 * k) := ""; set_vect_of_cums2(rec,the_cums); -- delete the k-th second cumulant the_ch := vect_of_children(rec); -- get the children of the current node the_ch(4 * k - 3..4 * k) := ""; set_vect_of_children(rec,the_ch); -- delete the k-th node set_vect_of_children(ndk,""); -- the children of ndk have been transfered incref(ndk,-1); -- drop the number of references to ndk; possibly erasing it --print("done join_with_left: ",hexify(dr_load(ndkm1))); return true; end join_with_left; procedure join_with_right(rec,k); -- join to right sibling -- this routine must copy the right sibling if it has a refcount > 1; and must reduce the -- refcount of the left sibling if k >= (nrv := num_childr(rec)) then return false; end if; nchk := #(rk := vect_of_children(ndk := voc(rec,k)))/4; rkp1 := vect_of_children(ndkp1 := voc(rec,k + 1)); if refcount(int_of_4(ndkp1)) > 1 then -- must copy pass("wo_jright_copy"); stg := dr_load(ndkp1); new_r := dr_new_rec(); dr_setrecbuf(new_r,stg); dr_dirtify(new_r); incref(ndkp1,-1); ndkp1 := new_r; -- substitute copy for original increfs(new_r,1); set_voc(rec,k + 1,new_r); end if; pass("wo_jright"); last_left_cums := wo_get_cum(ndk); -- the cumulant of the last child of the left sibling -- must be added to that of every child of the right sibling for j in [1..#rkp1/4] loop set_ch_cum(ndkp1,j,last_left_cums + get_ch_cum(ndkp1,j)); end loop; set_vect_of_cums(ndkp1,vect_of_cums(ndk) + vect_of_cums(ndkp1)); -- the right node gets all the cumulants set_vect_of_cums2(ndkp1,vect_of_cums2(ndk) + vect_of_cums2(ndkp1)); -- the right node gets all the cumulants set_vect_of_children(ndkp1,rk + rkp1); -- the right node gets all the children -- now delete the k-th cumulant the_cums := vect_of_cums(rec); -- get the cums of the current node the_cums(5 * k - 4..5 * k) := ""; set_vect_of_cums(rec,the_cums); -- delete the k-th cumulant the_cums := vect_of_cums2(rec); -- get the second cums of the current node the_cums(4 * k - 3..4 * k) := ""; set_vect_of_cums2(rec,the_cums); -- delete the k-th second cumulant --print("join_with_right: ",nchk," ",last_left_cums," ",hexify(the_cums)); the_ch := vect_of_children(rec); -- get the children of the current node the_ch(4 * k - 3..4 * k) := ""; set_vect_of_children(rec,the_ch); -- delete the k-th node --print("done join_with_right: ",hexify(dr_load(ndkp1))); set_vect_of_children(ndk,""); -- the children of ndk have been transfered incref(ndk,-1); -- drop the number of references to ndk; possibly erasing it return true; end join_with_right; procedure split_node(rec,k); -- split the k-th node into two -- we insert an empty node to the right of node k, and then share the children of node k with this empty node -- the empty node inserted stars wwith refcount = 1 voch := vect_of_children(rec); vocums := vect_of_cums(rec); vocums2 := vect_of_cums2(rec); vocums(5 * k + 1..5 * k) := vocums(5 * k - 4..5 * k); -- duplicate the prior cums vocums2(4 * k + 1..4 * k) := vocums2(4 * k - 3..4 * k); set_vect_of_cums(rec,vocums); -- and insert the revised cums into the record set_vect_of_cums2(rec,vocums2); -- duplicate the prior second cum and insert it into the record -- insert new node, whose number of children is automatically 0 u2 := dr_new_rec(); set_type(u2,if dr_is_compound(voc(rec,1)) then wdoccs_str_node_record else wdoccs_str_node_ncr end if); voch(4 * k + 1..4 * k) := u2; -- put in the new child set_vect_of_children(rec,voch); -- and insert the revised children into the record share_right(rec,k); -- share the children of node k with this empty node --print("split_node: ",str(wo_dump(rec))," ",num_childr(rec)," ",hexify(wo_get_cum2(rec))); end split_node; procedure wo_check_tree_structure(tree); -- recursive check of tree structure var level_now := 0,set_of_levels := { }; var smallest_branching := 10000; check_tree_structure_in(tree); -- call inner recursive workhorse if #set_of_levels > 1 then print("TREE STRUCTURE INCONSISTENCY, LEVELS ARE: ",set_of_levels," ",str(wo_dump(tree))); return false; end if; if smallest_branching < wo_low_lim then print("TREE STRUCTURE INCONSISTENCY, BRANCHING LEVEL DROPS TO: ",smallest_branching); return false; end if; return true; -- otherwise OK procedure check_tree_structure_in(tree); -- inner recursive workhorse level_now +:= 1; nc := num_childr(tree); if level_now > 1 then smallest_branching := smallest_branching min nc; end if; if dr_is_compound(tree) then for j in [1..nc] loop check_tree_structure_in(voc(tree,j)); end loop; else -- at leaf, so collect level of leaf set_of_levels with:= level_now; end if; level_now -:= 1; -- restore the prior level end check_tree_structure_in; end wo_check_tree_structure; end B_tree_for_wdocstring; program test; -- tests of B-tree operations for record occurence trees use setldb,byteutil, B_tree_for_wdocstring,db_records,string_utility_pak,disk_records_pak; var tree,small_tree; -- tree to test code_pts := wo_code_pts; -- code points to be traversed --points not passed: {"wo_in_copy", "wo_jleft_copy", "wo_share_copy", "wo_share_copy2", "wo_in_nend2", --"wo_nlast2", "wo_nc2", "wo_set_nlast2", "wo_set_last2"} small_tree := wo_make_from_tuple(breakex("10,20,30,2,4,6,11")); print("small_tree to search: ",str(wo_dump(small_tree))); print("first_past_to in small tree: ",string_and_ix(wo_first_past_to(small_tree,stg_of_4(4),4,6))); incref(small_tree,-1); -- demolish the tree print("memory check A1: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); tree := wo_make_from_tuple(breakex("10,20,30,2,4,6,11,21,31")); print("tree to search: ",str(wo_dump(tree))); print("first_past_to in tree: ",string_and_ix(wo_first_past_to(tree,stg_of_4(4),4,6))); print("first_past_to for key 5 in tree: ",string_and_ix(wo_first_past_to(tree,stg_of_4(5),4,6))); incref(tree,-1); -- demolish the tree print("memory check A2: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); tree := wo_make_from_tuple(breakex("" +/ [str(2000 + j) + ",": j in [1..60]] + "1100,1200,1300,1211,1411,1611,1111,1121,131,100,200,300,211,411,611,111,121,131,10,20,30,2,4,6,11,21,31")); print("big tree to search: ",str(wo_dump(tree))); print("first_past_to in big tree: ",string_and_ix(wo_first_past_to(tree,stg_of_4(4),82,84))); print("first_past_to for key 5 in big tree: ",string_and_ix(wo_first_past_to(tree,stg_of_4(5),82,84))); incref(tree,-1); -- demolish the tree print("memory check A3: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); str_len_cum_tests; -- tests of B-tree operations for record occurence trees report_points_passed(); -- report on code points not traversed procedure ocrec_occs(rec); -- convert occurence record num (4 bytes) to list of occurences return show_hex(wo_slice(rec,1,wo_length(rec))); end ocrec_occs; procedure str_len_cum_tests; -- tests of B-tree operations for record occurence trees small_tree := wo_make_from_tuple(breakex("1,2,3")); for j in [1..150] loop leaf := set_type(dr_new_rec(),wdoccs_string_record); wo_set_slice(leaf,1,0,stg_of_4(j)); wo_insert(small_tree,OM,leaf); -- insert leaf at end of small tree incref(leaf,-1); -- the leaf is now dead end loop; print("check_consistency: ",check_consistency(small_tree)," ",str(wo_dump(small_tree))); incref(small_tree,-1); -- demolish the tree print("memory check A4: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); small_tree := wo_make_from_tuple(breakex("1,2,3")); for j in [1..150] loop leaf := set_type(dr_new_rec(),wdoccs_string_record); wo_set_slice(leaf,1,0,stg_of_4(j)); wo_insert(small_tree,1,leaf); -- insert leaf at start of small tree incref(leaf,-1); -- the leaf is now dead end loop; print("check_consistency: ",check_consistency(small_tree)," ",str(wo_dump(small_tree))); incref(small_tree,-1); -- demolish the tree print("memory check 0: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); small_tree := wo_make_from_tuple(breakex("30,31,32")); print("small_tree: ",str(wo_dump(small_tree))); print("components: ",collect_leaves(small_tree)," ",collect_cums(small_tree)," ",check_consistency(small_tree)); for j in [1..wo_get_cum(small_tree)] loop leaf := set_type(dr_new_rec(),wdoccs_string_record); wo_set_slice(leaf,1,0,stg_of_4(j)); wo_set_comp(small_tree,j,leaf); -- change leaf of small tree incref(leaf,-1); -- the leaf is now dead if not check_consistency(small_tree) then print("small_tree components after change: ",j," ",collect_cums(small_tree)); stop; end if; end loop; print("small_tree change test passed"); for j in [1..wo_get_cum(small_tree)] loop wo_set_comp(small_tree,1,OM); -- delete leaf of small tree if not check_consistency(small_tree) then print("small_tree components after deletion: ",j," ",collect_cums(small_tree)); stop; end if; end loop; print("small_tree deletion test passed"); incref(small_tree,-1); -- demolish the tree print("memory check 1: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); small_tree := wo_make_from_tuple(breakex("30,31,32")); for j in [1..wo_get_cum(small_tree)] loop wo_set_comp(small_tree,OM,OM); -- delete leaf of small tree if not check_consistency(small_tree) then print("small_tree components after deletion: ",j," ",collect_cums(small_tree)); stop; end if; end loop; print("small_tree end deletion test passed"); incref(small_tree,-1); -- demolish the tree print("memory check 2: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); tree := wo_make_from_tuple(breakex("11,12,13,14,15,16,17,18,19,20,21,22")); print("tree: ",str(wo_dump(tree))," ",wo_get_cum(tree)," ",shexify(wo_get_cum2(tree))," ",check_consistency(tree)); print("components: ",collect_leaves(tree)," ",collect_cums(tree)); for j in [1..wo_get_cum(tree)] loop leaf := set_type(dr_new_rec(),wdoccs_string_record); wo_set_slice(leaf,1,0,stg_of_4(j)); wo_set_comp(tree,j,leaf); -- change leaf of small tree incref(leaf,-1); -- the leaf is now dead if not check_consistency(tree) then print("tree components after change: ",j," ",collect_cums(small_tree)); stop; end if; end loop; print("change test passed"); print("components after changes: ",str(wo_dump(tree))," ",check_consistency(tree)); for j in [1..wo_get_cum(tree)] loop wo_set_comp(tree,1,OM); -- delete leaf of small tree if not check_consistency(tree) then print("tree components after deletion: ",j," ",collect_cums(tree)); stop; end if; end loop; print("deletion test passed"); incref(tree,-1); -- demolish the tree print("memory check 3: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); tree := wo_make_from_tuple(breakex("11,12,13,14,15,16,17,18,19,20,21,22")); for j in [1..wo_get_cum(tree)] loop wo_set_comp(tree,OM,OM); -- delete leaf of small tree if not check_consistency(tree) then print("tree components after deletion: ",j," ",collect_cums(small_tree)); stop; end if; end loop; print("end deletion test passed"); incref(tree,-1); -- demolish the tree print("memory check 4: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); use_history := []; for reps in [1..the_last := 15] loop tree := wo_make_from_tuple(breakex("11,12,13,14,15,16,17,18,19,20,21,22")); xtree := tree; incref(tree,1); -- save and note a second copy for j in [1..7] loop wo_set_comp(tree,1,OM); end loop; -- deletions in one copy if reps = the_last then print("tree: ",str(wo_dump(tree))); print("xtree: ",str(wo_dump(xtree))); end if; incref(tree,-1); incref(xtree,-1); -- demolish both trees use_history with:= str(if is_tuple(cm := check_memory()) then cm(1) else cm end if); end loop; print("use_history A:",str(use_history)); use_history := []; for reps in [1..the_last := 15] loop small_tree := wo_make_from_tuple(breakex("1,2,3")); for j in [1..34] loop leaf := set_type(dr_new_rec(),wdoccs_string_record); wo_set_slice(leaf,1,0,stg_of_4(j)); wo_insert(small_tree,OM,leaf); -- insert leaf into small tree incref(leaf,-1); -- the leaf is now dead --print("insertion: ",j," ",str(wo_dump(small_tree))); --if not check_consistency(small_tree) then stop; end if; end loop; if reps = the_last then print("small tree after insertions: ",str(wo_dump(small_tree))," ",wo_get_cum(small_tree)," ",check_consistency(small_tree)); end if; incref(small_tree,-1); -- demolish the tree use_history with:= str(if is_tuple(cm := check_memory()) then cm(1) else cm end if); end loop; print("use_history B:",str(use_history)); use_history := []; small_tree := wo_make_from_tuple(breakex("1,2,3")); for j in [1..3] loop wo_set_comp(small_tree,1,OM); -- delete leaf of small tree print("small tree: ",str(wo_dump(small_tree))); -- check the tree structure end loop; incref(small_tree,-1); -- demolish the tree use_history with:= str(if is_tuple(cm := check_memory()) then cm(1) else cm end if); for j in [1..10] loop small_tree := wo_make_from_tuple(breakex("1,2,3")); -- print("small tree: ",str(wo_dump(small_tree))); -- check the tree structure -- print(collect_leaves(small_tree)); print(collect_cums(small_tree)); leaf := set_type(dr_new_rec(),wdoccs_string_record); wo_set_slice(leaf,1,0,stg_of_4(666)); -- print("leaf check: ",string_of(leaf)); wo_set_comp(small_tree,1,leaf); -- change leaf of small tree incref(leaf,-1); -- the leaf is now dead -- print("small_tree check: ",str(wo_dump(small_tree))); leaf := set_type(dr_new_rec(),wdoccs_string_record); wo_set_slice(leaf,1,0,stg_of_4(999)); -- print("leaf recheck: ",string_of(leaf)); wo_set_comp(small_tree,1,leaf); -- change leaf of small tree incref(leaf,-1); -- the leaf is now dead -- print("small_tree recheck: ",str(wo_dump(small_tree))); incref(small_tree,-1); -- demolish the tree use_history with:= str(if is_tuple(cm := check_memory()) then cm(1) else cm end if); end loop; print("use_history C:",str(use_history)); small_tree := wo_make_from_tuple(breakex("1,2,3")); for j in [1..3] loop leaf := set_type(dr_new_rec(),wdoccs_string_record); wo_set_slice(leaf,1,0,stg_of_4(j)); wo_set_comp(small_tree,j,leaf); -- change leaf of small tree incref(leaf,-1); -- the leaf is now dead end loop; print("small_tree after changes: ",collect_cums(small_tree)," ",wo_get_cum(small_tree)); print("check_consistency - small_tree after changes: ",check_consistency(small_tree)); incref(small_tree,-1); -- demolish the tree use_history with:= str(if is_tuple(cm := check_memory()) then cm(1) else cm end if); print("use_history D: ",str(use_history)); tree := wo_make_from_tuple(breakex("11,12,13,14,15,16,17,18,19,20,21,22")); print(str(wo_dump(tree))); print("tree length is: ",wo_get_cum(tree)); print("distinct tree components w cums. are: ",str(collect_cums(tree))); print("distinct tree components are: ",str(collect_leaves(tree))); for j in [1..10] loop leaf := set_type(dr_new_rec(),wdoccs_string_record); wo_set_slice(leaf,1,0,stg_of_4(j)); wo_set_comp(tree,j,leaf); -- change leaf of tree incref(leaf,-1); -- the leaf is now dead end loop; print("tree after changes: ",collect_cums(tree)," ",wo_get_cum(tree)); print("check_consistency - tree after changes: ",check_consistency(tree)); incref(tree,-1); -- demolish the tree print("use_history E :",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); tree := wo_make_from_tuple(breakex("11,12,13,14,15,16,17,18,19,20,21,22")); print("tree before deletion: ",str(wo_dump(tree))," ",wo_get_cum(tree)); wo_set_comp(tree,1,OM); -- delete leaf of small tree print("tree after 1 deletion: ",str(wo_dump(tree)),"\n",str(collect_cums(tree))," ",wo_get_cum(tree)); print("check_consistency - tree after 1 deletion: ",check_consistency(tree)); for j in [1..10] loop wo_set_comp(tree,1,OM); -- delete leaf of tree end loop; print("check_consistency - tree after deletions: ",check_consistency(tree)); incref(tree,-1); -- demolish the tree print("use_history F: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); tree := wo_make_from_tuple(breakex("11,12,13,14,15,16,17,18,19,20,21,22")); print("tree before deletion: ",str(wo_dump(tree))," ",wo_get_cum(tree)); wo_set_comp(tree,OM,OM); -- delete leaf of small tree print("tree after 1 deletion: ",str(wo_dump(tree))," ",wo_get_cum(tree)); print("check_consistency - tree after 1 end deletion: ",check_consistency(tree)); for j in [1..10] loop wo_set_comp(tree,OM,OM); -- delete leaf of tree end loop; print("check_consistency - tree after end deletions: ",wo_get_cum(tree)," ",check_consistency(tree)); for j in [1..150] loop leaf := set_type(dr_new_rec(),wdoccs_string_record); wo_set_slice(leaf,1,0,stg_of_4(j)); wo_insert(tree,1,leaf); -- insert leaf into tree incref(leaf,-1); -- the leaf is now dead end loop; print("check_consistency - insertions at start: ",check_consistency(tree)); incref(tree,-1); -- demolish the tree print("use_history G: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); tree := wo_make_from_tuple(breakex("11,12,13,14,15,16,17,18,19,20,21,22")); for j in [1..150] loop leaf := set_type(dr_new_rec(),wdoccs_string_record); wo_set_slice(leaf,1,0,stg_of_4(j)); wo_insert(tree,OM,leaf); -- insert leaf into tree incref(leaf,-1); -- the leaf is now dead end loop; print("check_consistency - insertions at end: ",check_consistency(tree)); incref(tree,-1); -- demolish the tree print("use_history H: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); small_tree := wo_make_from_tuple(breakex("1,2,3")); for j in [1..150] loop leaf := set_type(dr_new_rec(),wdoccs_string_record); wo_set_slice(leaf,1,0,stg_of_4(j)); wo_insert(small_tree,1,leaf); -- insert leaf into small tree incref(leaf,-1); -- the leaf is now dead end loop; print("check_consistency - small_tree insertions at start: ",check_consistency(small_tree)); incref(small_tree,-1); -- demolish the tree print("use_history I: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); small_tree := wo_make_from_tuple(breakex("1,2,3")); for j in [1..150] loop leaf := set_type(dr_new_rec(),wdoccs_string_record); wo_set_slice(leaf,1,0,stg_of_4(j)); wo_insert(small_tree,OM,leaf); -- insert leaf into small tree incref(leaf,-1); -- the leaf is now dead end loop; print("check_consistency - small_tree insertions at end: ",check_consistency(small_tree)); incref(small_tree,-1); -- demolish the tree print("use_history J: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); small_tree := wo_make_from_tuple(breakex("1,2,3")); print(str(wo_dump(small_tree))); print("small_tree length is: ",wo_get_cum(small_tree)); print("small_tree components are: ",str([wo_slice(c1 := wo_comp(small_tree,j),1,wo_length(c1)): j in [1..wo_get_cum(small_tree)]])); print("small_tree components w cums. are: ",str([wo_slice(c1 := wo_comp(small_tree,j),1,wo_length(c1)): j in [1..wo_get_cum(small_tree)]])); print("distinct small_tree components w cums. are: ",collect_cums(small_tree)); print("distinct small_tree components are: ",collect_leaves(small_tree)); leaf := set_type(dr_new_rec(),wdoccs_string_record); wo_set_slice(leaf,1,0,stg_of_4(1)); wo_insert(small_tree,1,leaf); -- insert leaf at start of tree incref(leaf,-1); -- the leaf is now dead print("small_tree after insertion at start: ",collect_cums(small_tree)," ",wo_get_cum(small_tree)); leaf := set_type(dr_new_rec(),wdoccs_string_record); wo_set_slice(leaf,1,0,stg_of_4(2)); wo_insert(small_tree,OM,leaf); -- insert leaf at start of tree print("small_tree after insertion at end: ",collect_cums(small_tree)," ",wo_get_cum(small_tree)); incref(leaf,-1); -- the leaf is now dead leaf := set_type(dr_new_rec(),wdoccs_string_record); wo_set_slice(leaf,1,0,stg_of_4(666)); print("leaf check: ",string_of(leaf)); wo_set_comp(small_tree,1,leaf); -- change leaf of small tree incref(leaf,-1); -- the leaf is now dead print("small_tree check: ",str(wo_dump(small_tree))); leaf := set_type(dr_new_rec(),wdoccs_string_record); wo_set_slice(leaf,1,0,stg_of_4(999)); print("leaf recheck: ",string_of(leaf)); wo_set_comp(small_tree,1,leaf); -- change leaf of small tree incref(leaf,-1); -- the leaf is now dead print("small_tree recheck: ",str(wo_dump(small_tree))); for j in [1..3] loop leaf := set_type(dr_new_rec(),wdoccs_string_record); wo_set_slice(leaf,1,0,stg_of_4(j)); wo_set_comp(small_tree,j,leaf); -- change leaf of small tree incref(leaf,-1); -- the leaf is now dead end loop; print("small_tree after changes: ",collect_cums(small_tree)," ",wo_get_cum(small_tree)); incref(small_tree,-1); -- demolish the tree print("use_history K: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); tree := wo_make_from_tuple(breakex("11,12,13,14,15,16,17,18,19,20,21,22")); print(str(wo_dump(tree))); print("tree length is: ",wo_get_cum(tree)); print("distinct tree components w cums. are: ",collect_cums(tree)); print("distinct tree components are: ",collect_leaves(tree)); for j in [1..10] loop leaf := set_type(dr_new_rec(),wdoccs_string_record); wo_set_slice(leaf,1,0,stg_of_4(j)); wo_set_comp(tree,j,leaf); -- change leaf of small tree incref(leaf,-1); -- the leaf is now dead end loop; print("tree after changes: ",collect_cums(tree)," ",wo_get_cum(tree)); for j in [1..10] loop wo_set_comp(tree,1,OM); -- delete first leaf of tree end loop; print("tree after deletion: ",10," ",collect_cums(tree)," ",wo_get_cum(tree)); incref(tree,-1); -- demolish the tree print("use_history LL: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); tree := wo_make_from_tuple(breakex("11,12,13,14,15,16,17,18,19,20,21,22")); for j in [1..10] loop wo_set_comp(tree,OM,OM); -- delete last leaf of tree end loop; print("tree after end deletion: ",10," ",collect_cums(tree)," ",wo_get_cum(tree)); incref(tree,-1); -- demolish the tree print("use_history L: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); tree := wo_make_from_tuple(breakex("1,2,3")); leaf := set_type(dr_new_rec(),wdoccs_string_record); wo_set_slice(leaf,1,0,stg_of_4(1)); wo_insert(tree,1,leaf); -- insert leaf at start of tree incref(leaf,-1); -- the leaf is now dead print("tree after insertion at start: ",collect_cums(tree)," ",wo_get_cum(tree)); leaf := set_type(dr_new_rec(),wdoccs_string_record); wo_set_slice(leaf,1,0,stg_of_4(2)); wo_insert(tree,OM,leaf); -- insert leaf at end of tree incref(leaf,-1); -- the leaf is now dead print("tree after insertion at end: ",collect_cums(tree)," ",wo_get_cum(tree)); incref(tree,-1); -- demolish the tree print("use_history M: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); small_tree := wo_make_from_tuple(breakex("1,2,3")); for j in [1..150] loop leaf := set_type(dr_new_rec(),wdoccs_string_record); wo_set_slice(leaf,1,0,stg_of_4(j)); wo_insert(small_tree,1,leaf); -- insert leaf into small tree incref(leaf,-1); -- the leaf is now dead end loop; print("check_consistency: ",check_consistency(small_tree)); for j in [1..140] loop wo_set_comp(small_tree,1,OM); -- delete first leaf of tree end loop; print("small_tree after re_deletion at start: ",collect_cums(small_tree)," ",wo_get_cum(small_tree)); print("check_consistency: ",check_consistency(small_tree)); incref(small_tree,-1); -- demolish the tree print("use_history N: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); small_tree := wo_make_from_tuple(breakex("1,2,3")); for j in [1..150] loop leaf := set_type(dr_new_rec(),wdoccs_string_record); wo_set_slice(leaf,1,0,stg_of_4(j)); wo_insert(small_tree,OM,leaf); -- insert leaf into small tree incref(leaf,-1); -- the leaf is now dead end loop; print("check_consistency: ",check_consistency(small_tree)); for j in [1..140] loop wo_set_comp(small_tree,OM,OM); -- delete last leaf of tree end loop; print("small_tree after re_deletion at end: ",collect_cums(small_tree)," ",wo_get_cum(small_tree)); print("check_consistency: ",check_consistency(small_tree)); incref(small_tree,-1); -- demolish the tree print("use_history O: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); small_tree := wo_make_from_tuple(breakex("1,2,3")); print("last element: ",string_of((stom := wo_comp_cum(small_tree,OM))(1))," ",stom(2)); print("check_consistency: ",check_consistency(small_tree)); print("small_tree cums: "); print("small_tree leaves with cumulants: ",str([string_and_cum(wo_comp_cum(small_tree,j)): j in [1..wo_get_cum(small_tree)]])); for j in [1..3] loop wo_set_comp(small_tree,1,OM); -- delete first leaf of small tree print("small_tree after deletion: ",j," ",collect_cums(small_tree)," ",wo_get_cum(small_tree)); end loop; for j in [1..34] loop leaf := set_type(dr_new_rec(),wdoccs_string_record); wo_set_slice(leaf,1,0,stg_of_4(j)); wo_insert(small_tree,OM,leaf); -- insert leaf into small tree incref(leaf,-1); -- the leaf is now dead end loop; print("small_tree after insertion at end: ",collect_cums(small_tree)," ",wo_get_cum(small_tree)); print("last element: ",string_of((stom := wo_comp_cum(small_tree,OM))(1))," ",stom(2)); print("check_consistency: ",check_consistency(small_tree)); incref(small_tree,-1); -- demolish the tree print("use_history P: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); tree := wo_make_from_tuple(breakex("11,12,13,14,15,16,17,18,19,20,21,22")); for j in [1..12] loop wo_set_comp(tree,OM,OM); end loop; -- delete last leaf of tree print("tree after right deletions: ",collect_cums(tree)," ",wo_get_cum(tree)); incref(tree,-1); -- demolish the tree print("use_history Q: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); tree := wo_make_from_tuple(breakex("11,12,13,14,15,16,17,18,19,20,21,22")); for j in [1..12] loop wo_set_comp(tree,1,OM); end loop; -- delete last leaf of tree print("tree after left deletions: ",collect_cums(tree)," ",wo_get_cum(tree)); incref(tree,-1); -- demolish the tree print("use_history R: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); tree := wo_make_from_tuple(breakex("11,12,13,14,15,16,17,18,19,20,21,22")); for j in [1..12] loop leaf := set_type(dr_new_rec(),wdoccs_string_record); wo_set_slice(leaf,1,0,stg_of_4(j)); wo_set_comp(tree,j,leaf); -- change j-th leaf of tree incref(leaf,-1); -- the leaf is now dead end loop; print("tree after changes: ",collect_cums(tree)," ",wo_get_cum(tree)); print(); print("check_consistency: ",check_consistency(tree)); incref(tree,-1); -- demolish the tree print("use_history S: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); print("big_tree cums"); tree := wo_make_from_tuple(breakex("11,12,13,14,15,16,17,18,19,20,21,22")); print("tree leaves with cumulants: ",str([string_and_cum(wo_comp_cum(tree,j)): j in [1..wo_get_cum(tree)]])); print("tree leaves (with reps): ",str([ocrec_occs(wo_comp(tree,j)): j in [1..wo_get_cum(tree)]])); incref(tree,-1); -- demolish the tree print("use_history S: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); print("*********** initial_tests ***********"); initial_tests; -- initial tests for B-trees with cumulators print("*********** deletion_tests ***********"); deletion_tests; -- test the deletion operations for B-trees with cumulators print("*********** insertion_tests ***********"); insertion_tests; -- test the insertion operations for B-trees with cumulators end str_len_cum_tests; procedure str_len(stg); return [wo_length(stg)]; end str_len; -- string length function, in a unit tuple procedure collect_leaves(t); -- collect the distinct leaves of a tree if num_childr(t) = 0 then return "[]"; end if; -- WORKAROUND FOR FOLLOWING LINE ***** return str([ocrec_occs(wo_comp(t,j)): j in [1..wo_get_cum(t)] | (j = 1 or wo_comp(t,j) /= wo_comp(t,j - 1))]); end collect_leaves; procedure collect_cums(t); -- collect the distinct leaves of a tree if num_childr(t) = 0 then return "[]"; end if; -- WORKAROUND FOR FOLLOWING LINE ***** return str([string_and_cum(wo_comp_cum(t,j)): j in [1..wo_get_cum(t)] | (j = 1 or wo_comp_cum(t,j) /= wo_comp_cum(t,j - 1))]); end collect_cums; procedure string_and_cum(rno_cum); -- convert a string node recno to the corresponding string [rec_no,the_cum,the_cum2] := rno_cum; return [ocrec_occs(rec_no),the_cum,shexify(the_cum2)]; end string_and_cum; procedure string_and_ix(rno_ix); -- convert a string node [db_key,index] pair to the corresponding string if rno_ix = OM then return OM; end if; [rec_no,the_cum] := rno_ix; return [int_of_4(rec_no),the_cum]; end string_and_ix; procedure string_of(rec); -- convert a string node rec to the corresponding string return wo_slice(rec,1,wo_length(rec)); -- get its string end string_of; procedure breakex(stg); -- convert comma-separated string of ints into tuple of 4-byte record ids t := []; for ints in breakup(stg,",") loop reads(ints,int); t with:= stg_of_4(int); end loop; return t; end breakex; procedure initial_tests; -- initial tests for B-trees with cumulators small_tree := wo_make_from_tuple(breakex("1,2,3")); print("small_tree - a.bb.ccc: ",str(wo_dump(small_tree))); print("small_tree leaf 1 and length: ",string_of(wo_comp(small_tree,1)), " " ,wo_get_cum(small_tree)); print("small_tree leaves (with reps.) and length: ",[ocrec_occs((wo_comp(small_tree,j))): j in [1..wo_get_cum(small_tree)]]); print("small_tree leaves (no reps.): ",collect_leaves(small_tree)); incref(small_tree,-1); -- demolish the tree print("use_history T: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); tree := wo_make_from_tuple(breakex("11,12,13,14,15,16,17,18,19,20,21,22")); print("tree leaves: ",str(wo_dump(tree))); print("tree leaf 1 and length: ",ocrec_occs(wo_comp(tree,1)), " " ,wo_get_cum(tree)); print("12 tree leaves (with reps.): ",str([ocrec_occs(wo_comp(tree,j)): j in [1..12]])); print("tree leaves (no reps.): ",collect_leaves(tree)); wo_set_comp(tree,1,OM); -- delete the first element print("first element of compound tree deleted: ",wo_get_cum(tree)," ",str(collect_cums(tree))); print("check_consistency: ",check_consistency(tree)); leaf := set_type(dr_new_rec(),wdoccs_string_record); wo_set_slice(leaf,1,0,stg_of_4(666)); wo_set_comp(tree,2,leaf); incref(leaf,-1); -- the leaf is now dead print("'666' inserted into element of compound tree: ",str(wo_dump(tree))); incref(tree,-1); -- demolish the tree print("use_history T: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); print("Iterative check; changes in successive positions"); all_ok := true; tree := wo_make_from_tuple(breakex("11,12,13,14,15,16,17,18,19,20,21,22")); -- re-initialize the tree for j in [1..10] loop leaf := set_type(dr_new_rec(),wdoccs_string_record); wo_set_slice(leaf,1,0,stg_of_4(666)); wo_set_comp(tree,j,leaf); incref(leaf,-1); -- the leaf is now dead if not check_consistency(tree) then print("IC1 - iteration ",j," error ",wo_dump(tree)); print(str(wo_dump(tree))); for mm in [1..wo_get_cum(tree)] loop print(mm,": ",wo_comp(tree,mm)); end loop; all_ok := false; stop; end if; --print("CC1 - iteration ",j," ",str(wo_dump(tree))); --print(str(wo_dump(tree))); end loop; if all_ok then print("Iterative change test passed successfully."); end if; incref(tree,-1); -- demolish the tree print("use_history U: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); end initial_tests; procedure deletion_tests; -- test the deletion operations for B-trees with cumulators print("Iterative check; deletions in successive positions"); all_ok := true; tree := wo_make_from_tuple(breakex("11,12,13,14,15,16,17,18,19,20,21,22")); -- re-initialize the tree for j in [1..10] loop wo_set_comp(tree,j,OM); if not check_consistency(tree) then print("IC1 - iteration ",j," error ",wo_dump(tree)); print(str(wo_dump(tree))); for mm in [1..wo_get_cum(tree)] loop print(mm,": ",tree(mm)); end loop; all_ok := false; stop; end if; incref(tree,-1); -- demolish the tree --print("IC1 - iteration ",j," ",str(wo_dump(tree))); --print(str(wo_dump(tree))); tree := wo_make_from_tuple(breakex("11,12,13,14,15,16,17,18,19,20,21,22")); -- re-initialize the tree end loop; if all_ok then print("First deletion test passed successfully."); end if; incref(tree,-1); -- demolish the tree print("use_history V: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); tree := wo_make_from_tuple(breakex("11,12,13,14,15,16,17,18,19,20,21,22")); -- re-initialize the tree all_ok := true; print("Iterative check; deletions at start"); for j in [1..10] loop wo_set_comp(tree,1,OM); -- delete the first tree element if not check_consistency(tree) then print("IC2 - iteration ",j," error ",wo_dump(tree)); print(str(wo_dump(tree))); all_ok := false; stop; end if; -- print("IC2 - iteration ",j," ",str(wo_dump(tree))); end loop; if all_ok then print("Second deletion test passed successfully."); end if; incref(tree,-1); -- demolish the tree print("use_history W: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); tree := wo_make_from_tuple(breakex("11,12,13,14,15,16,17,18,19,20,21,22")); -- re-initialize the tree all_ok := true; print("Iterative check; deletions at end"); for j in [1..10] loop wo_set_comp(tree,wo_get_cum(tree),OM); -- delete the last tree element if not check_consistency(tree) then print("IC3 - iteration ",j," error ",wo_dump(tree)); print("#tree is: ", wo_get_cum(tree)); print(str(wo_dump(tree))); all_ok := false; stop; end if; -- print("IC3 - iteration ",j," ",str(wo_dump(tree))); end loop; if all_ok then print("Third deletion test passed successfully."); end if; incref(tree,-1); -- demolish the tree print("use_history X: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); end deletion_tests; procedure insertion_tests; -- test the insertion operations for B-trees with cumulators all_ok := true; print("Iterative check; insertions after end, starting with null tree"); tree := wo_make_from_tuple([stg_of_4(99)]); wo_set_comp(tree,1,OM); for j in [1..10] loop leaf := set_type(dr_new_rec(),wdoccs_string_record); wo_set_slice(leaf,1,0,stg_of_4(j)); wo_insert(tree,wo_get_cum(tree) + 1,leaf); -- make the insertion incref(leaf,-1); -- the leaf is now dead --print("After CUM_INS0 - iteration ",j," ",str(wo_dump(tree))); --print(str(wo_dump(tree))); if (not check_consistency(tree)) then print("CUM_INS0 - iteration ",j," error ",lo," ",le); print(str(wo_dump(tree))); all_ok := false; stop; end if; end loop; if all_ok then print("Insertion starting with null tree test passed successfully."); end if; incref(tree,-1); -- demolish the tree print("use_history XX: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); tree := wo_make_from_tuple(breakex("11,12,13,14,15,16,17,18,19,20,21,22")); -- re-initialize the tree all_ok := true; print("Iterative check; insertions after end"); all_ok := true; for j in [0..9] loop leaf := set_type(dr_new_rec(),wdoccs_string_record); wo_set_slice(leaf,1,0,stg_of_4(j)); wo_insert(tree,wo_get_cum(tree) + 1,leaf); -- make the insertion incref(leaf,-1); -- the leaf is now dead -- print("After CUM_INS1 - iteration ",j," ",str(wo_dump(tree))); if not check_consistency(tree) then print("CUM_INS1 - iteration ",j," error ",wo_dump(tree)); print(str(wo_dump(tree))); all_ok := false; stop; end if; end loop; if all_ok then print("Insertion-at-end test passed successfully."); end if; incref(tree,-1); -- demolish the tree print("use_history Y: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); tree := wo_make_from_tuple(breakex("11,12,13,14,15,16,17,18,19,20,21,22")); -- re-initialize the tree all_ok := true; print("Iterative check; insertions at start"); all_ok := true; for j in [0..9] loop leaf := set_type(dr_new_rec(),wdoccs_string_record); wo_set_slice(leaf,1,0,stg_of_4(j)); wo_insert(tree,1,leaf); -- make the insertion incref(leaf,-1); -- the leaf is now dead --print("After CUM_INS2 - iteration ",j," ",str(wo_dump(tree))); --print(str(wo_dump(tree))); if not check_consistency(tree) then print("CUM_INS2 - iteration ",j," error ",wo_dump(tree)); print(str(wo_dump(tree))); all_ok := false; stop; end if; end loop; if all_ok then print("Insertion-at-start test passed successfully."); end if; incref(tree,-1); -- demolish the tree print("use_history Z: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); tree := wo_make_from_tuple(breakex("11,12,13,14,15,16,17,18,19,20,21,22")); -- re-initialize the tree all_ok := true; print("Iterative check; insertions after second element"); for j in [0..9] loop leaf := set_type(dr_new_rec(),wdoccs_string_record); wo_set_slice(leaf,1,0,stg_of_4(j)); wo_insert(tree,3,leaf); -- make the insertion incref(leaf,-1); -- the leaf is now dead --print("After CUM_INS3 - iteration ",j," ",str(wo_dump(tree))); --print(str(wo_dump(tree))); if not check_consistency(tree) then print("CUM_INS3 - iteration ",j," error ",wo_dump(tree)); print(str(wo_dump(tree))); all_ok := false; stop; end if; end loop; if all_ok then print("Insertion-after-second test passed successfully."); end if; incref(tree,-1); -- demolish the tree print("use_history A1: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); end insertion_tests; procedure check_consistency(the_tree); -- consistency check for big_string trees if not wo_check_tree_structure(the_tree) then return false; end if; -- check that vector of leaves obtained directly is also obtained by collecting individual components if (lo := str(leaves_only(wo_dump(the_tree)))) /= (le := collect_leaves(the_tree)) then print(lo," ",str(le)," ",str(wo_dump(the_tree))); print("FAILURE: leaf discrepancy"); return false; end if; if num_childr(the_tree) = 0 then return true; end if; -- no cumulant check for empty tree -- check that cumulants advance properly if exists n in [2..wo_get_cum(the_tree)] | (((tn := wo_comp_cum(the_tree,n)) /= (tnm12 := wo_comp_cum(the_tree,n - 1))) and (tn(2) /= (tnm12(2) + (#string_of(tn(1)))/4))) then print("FAILURE: first cumulant bad at position ",n," ",string_and_cum(tn)," ",string_and_cum(tnm12)," ",string_of(tn(1))); return false; end if; if exists n in [1..wo_get_cum(the_tree)] | (((tn := wo_comp_cum(the_tree,n)) /= (tnm12 := wo_comp_cum(the_tree,n - 1))) and (tn(3) /= string_of(tn(1)))) then print("FAILURE: first cumulant bad at position ",n," ",string_and_cum(tn)," ",string_and_cum(tnm12)," ",string_of(tn(1))); return false; end if; -- check that the first cumulant is good if (cc1 := wo_comp_cum(the_tree,1))(2) /= (#string_of(cc1(1)))/4 then print("FAILURE: first node cumulant in position 1 (",cc1(2),") is not leaf length"); return false; end if; return true; end check_consistency; procedure leaves_only(tup); return [c: c in tup | ("(" notin c) and (")" notin c)]; end leaves_only; end test;