DB_bs_wdoc.stl
"Specialized big string for storing word occurence lists."
--file Big_stg_for_wdoc.stl package big_stg_for_wdoc_pak; -- specialized big string for storing word occurence lists const bswo_code_pts := {"set_copy", "set_simple", "not_dangerous", "share_left", "join_left", "share_right", "join_right", "use_leaf", "must_copy", "simple_in", "not_split", "split", "nosplit_in", "split_in"}; -- code points to be traversed procedure bswo_from_stg(stg); -- create a big string from a string procedure stg_from_bswostg(rec); -- make a string from a bswostg procedure bswo_comp(rec,i); -- get i-th component procedure bswo_set_comp(rw rec,i,stg); -- component assignment operation procedure bswo_insert(rw rec,i,stg); -- component insertion operation; if i = OM then insertion at end -- (PUBLIC FOR DEBUGGING ONLY) procedure wo_voc(rec,j); -- j'th member of vector of children for B_tree_for_bigstring records procedure bswo_length(rec); -- total length of a bswostg end big_stg_for_wdoc_pak; package body big_stg_for_wdoc_pak; -- specialized big string for storing word occurence lists use setldb,byteutil,disk_records_pak,db_records,B_tree_for_wdocstring,string_utility_pak; procedure bswo_from_stg(stg); -- make a big_string from a string -- we first chop up the string into a tuple of string records if #(pieces_tup := chop_up(stg)) = 1 then return make_record(pieces_tup(1)); end if; -- see comment on the 'chop_up' routine, below. If the whole string fits in -- just one string record, we simply return that record. --print("pieces_tup: ",[hexify(piece): piece in pieces_tup]); return wo_make_from_tuple(pieces_tup); -- otherwise convert the list of sections returned into a tree end bswo_from_stg; procedure stg_from_bswostg(rec); -- make a string from a big_string if dr_load(rec)(type_byte) = wdoccs_string_record then return wo_slice(rec,1,wo_length(rec)); end if; -- get string from record if not dr_is_compound(rec) then -- concatenate strings from records return "" +/ [wo_slice(ch,1,wo_length(ch)): j in [1..num_childr(rec)] | (ch := wo_voc(rec,j)) /= OM]; end if; return "" +/ [stg_from_bswostg(wo_voc(rec,j)): j in [1..num_childr(rec)]]; -- else proceed recursively end stg_from_bswostg; procedure chop_up(stg); -- chop the concatenation of three strings into a tuple of string records -- if all three pieces fit into one, two or three sections, return these. Otherwise join as much as possible of the -- middle into two pieces, and then the reminder of the middle into approximately equal-sized pieces. -- first and lat are assumed to be no more than one record size each if (ns := #stg)/4 <= wos_hi_lim then return [stg]; end if; -- put all into one piece psm1 := 4 * wos_hi_lim - 1; if (extra_part := ns mod (4 * wos_hi_lim)) = 0 then -- fits into a list of full pieces return [stg(j..j + psm1): j in [1,4 * wos_hi_lim + 1..ns - psm1]]; else -- there is a bit left over start_last_2 := ns - psm1 - extra_part; -- starting character of last 2 sections mid_last_2 := start_last_2 + 4 * ((4 * wos_hi_lim + extra_part)/8) - 1; -- middle character of last 2 sections -- use list of all but 1 full piece, plus two smaller pieces. return [stg(j..j + psm1): j in [1,4 * wos_hi_lim + 1..ns - 2 * psm1 - extra_part]] + [stg(start_last_2..mid_last_2),stg(mid_last_2 + 1..)]; end if; end chop_up; procedure bswo_comp(rec,i); -- component extraction operation. the 'node' can be either -- the top of a tree, or can be a simple wdoccs_string_record --print("bswo_comp: ",i); if i < 1 then abort("Illegal parameters in record id extraction operation "+ str(i)); end if; if dr_load(rec)(type_byte) = wdoccs_string_record then -- simple string record case if i > (sl := wo_length(rec)) then -- get string from record abort("Illegal item index in record id extraction operation: " + str(i) + ", " + str(sl)); end if; --print("bswo_comp bottom: ",i," ",wo_slice(rec,i,i)); return wo_slice(rec,i,i); end if; -- otherwise we have the tree case -- get the child containing the first character past i if not (exists chixi in [1..nc := num_childr(rec)] | wo_get_ch_cum(rec,chixi) >= i) then abort("Illegal component index in string extraction operation: " + str(i) + ", length is only " + str(wo_get_ch_cum(rec,nc))); end if; prev_cumi := if chixi = 1 then 0 else wo_get_ch_cum(rec,chixi - 1) end if; -- the cumulant previous to i return bswo_comp(wo_voc(rec,chixi),i - prev_cumi); -- proceed recursively end bswo_comp; procedure wo_voc(rec,j); -- j'th member of vector of children for B_tree_for_wdocstring records stg := dr_load(rec); -- load this string cjstrt := (j - 1) * 4 + wo_ch_start; return stg(cjstrt..cjstrt + 3); -- return child rec end wo_voc; procedure bswo_length(rec); -- total length of a bswostg return if dr_load(rec)(type_byte) = wdoccs_string_record then wo_length(rec) else wo_get_cum(rec) end if; end bswo_length; procedure wo_get_ch_cum(rec,j); -- get the cum value for the j-th child of this node return int_of_5(dr_load(rec)(wo_cum_start + (j - 1) * 5..wo_cum_start + j * 5 - 1)); end wo_get_ch_cum; procedure make_record(stg); -- creates one record from string rec := set_type(dr_new_rec(),wdoccs_string_record); wo_set_slice(rec,1,0,stg); return rec; end make_record; procedure pass_pass(stg); print(stg); end pass_pass; -- debug print procedure bswo_set_comp(rw rec,i,stg); -- component assignment operation; if stg = OM then have deletion --print("bswo_set_comp: ",i," ",stg," ",abs(dr_load(rec)(type_byte))," ",type(stg)); -- since rec will change, ensure that it has just one copy. -- This will up the refcounts of its children if necessary if refcount(int_of_4(rec)) > 1 then -- must copy pass("set_copy"); stgg := dr_load(rec); new_r := dr_new_rec(); dr_setrecbuf(new_r,stgg); dr_dirtify(new_r); increfs(new_r,1); incref(rec,-1); rec := new_r; -- substitute copy for original end if; if (contents := dr_load(rec))(type_byte) = wdoccs_string_record then -- simple string record case pass("set_simple"); if i < 1 or i > wo_length(rec) then abort("Illegal parameter in string assignment operation" + str(i)); end if; wo_set_slice(rec,i,i,stg?""); -- if stg = OM then have deletion return; end if; -- otherwise we deal with the B-tree case if i > (len_this := wo_get_cum(rec)) + 1 then abort("Out of range parameter in string assignment operation " + str(i) + " Length is only " + str(len_this)); end if; [leafi,int_cum,-] := wo_comp_cum(rec,i); -- get the leaf addressed prev_cum := int_cum - wo_length(leafi); -- the integer cumulant value just prior to the leaf old_leafi := leafi; -- save the old version of the leaf, for refcount manipulation incref(old_leafi,1); -- delay eliminating reference to this leaf --print("type(stg) before: ",type(stg)); -- APPARENT BUG ********** stg_not_om := stg /= OM; -- workaround for APPARENT BUG ********** wo_set_slice(leafi,i - prev_cum,i - prev_cum,stg?""); -- replace slice; leafi is copied, its old version will lose 1 ref --print("type(stg): ",type(stg)); -- APPARENT BUG ********** if (lni := wo_length(leafi)) > wos_low_lim or stg_not_om then -- not a dangerous deletion, so we can finish easily --print("stg: ",stg," ",lni," ",wos_low_lim," ",type(stg)); pass("not_dangerous"); wo_set_comp(rec,i,leafi); -- reset the leaf of rec, since it might have changed incref(leafi,-1); -- the variable leafi is now dead (compensate for extra ref added by set_comp) return; -- done with this case end if; -- otherwise we must either share or join with right or left neighbor leaf if prev_cum > 1 then -- use left neighbor leaf naybl := wo_length(nayb_leaf := wo_comp(rec,prev_cum)); nayb_stg := wo_slice(nayb_leaf,1,naybl); -- get the two strings li_stg := wo_slice(leafi,1,lni); old_nayb_leaf := nayb_leaf; -- save the old versions of the leaves, for refcount manipulation old_leafi := leafi; if naybl > wos_low_lim then -- share with left pass("share_left"); half := (lni + naybl)/2; -- part of the string that will remain with left neighbor incref(old_nayb_leaf,1); -- delay eliminating reference to the leaf wo_set_slice(nayb_leaf,1,naybl,nayb_stg(1.. 4 * half)); -- nayb_leaf has changed, old_nayb_leaf has lost one ref, which nayb_leaf carries wo_set_slice(leafi,1,lni,nayb_stg(4 * half + 1..) + li_stg); -- leafi has changed, old_leafi has lost one ref, which leafi carries wo_set_comp(rec,i,leafi); -- re-insert leafi, which has changed incref(leafi,-1); -- the variable leafi is now dead (compensate for extra ref added by set_comp) wo_set_comp(rec,prev_cum,nayb_leaf); -- re-insert nayb_leaf, which has changed incref(nayb_leaf,-1); -- compensate for extra ref added by set_comp else -- join pass("join_left"); incref(old_nayb_leaf,1); -- delay eliminating reference to the leaf wo_set_slice(nayb_leaf,1,naybl,nayb_stg + li_stg); -- nayb_leaf has changed from old_nayb_leaf --print("wo_set_slice: ",i," ",num_childr(rec)," ",hexify(nayb_stg + li_stg)," ",hexify(nayb_stg)," ",hexify(li_stg)); wo_set_comp(rec,i,OM); -- delete leaf i, which is no longer used; it loses one ref incref(old_leafi,-1); -- it loses one ref wo_set_comp(rec,prev_cum,nayb_leaf); -- insert the changed neighbor leaf incref(nayb_leaf,-1); -- compensate for extra ref added by set_comp end if; elseif i < len_this then -- use right neighbor leaf naybl := wo_length(nayb_leaf := wo_comp(rec,int_cum + 1)); nayb_stg := wo_slice(nayb_leaf,1,naybl); -- get the two strings li_stg := wo_slice(leafi,1,lni); old_nayb_leaf := nayb_leaf; -- save the old versions of the leaves, for refcount manipulation old_leafi := leafi; if naybl > wos_low_lim then -- share with right pass("share_right"); half := (lni + naybl)/2; -- part of the string that will remain with left neighbor incref(old_nayb_leaf,1); -- delay eliminating reference to the leaf wo_set_slice(nayb_leaf,1,naybl,nayb_stg(4 * (half - lni) + 1..)); -- if nayb_leaf has changed, old_nayb_leaf has lost one ref, which nayb_leaf carries wo_set_slice(leafi,1,lni,li_stg + nayb_stg(1..4 * (half - lni))); -- leafi has changed, old_leafi has lost one ref, which leafi carries wo_set_comp(rec,i,leafi); -- re-insert leaf i incref(leafi,-1); -- the variable leafi is now dead (compensate for extra ref added by set_comp) wo_set_comp(rec,int_cum + 1 + half - lni,nayb_leaf); -- (note change in cumulant index of this leaf) -- re-insert the left neighbor leaf, which will have changed incref(nayb_leaf,-1); -- to compensate for the additional ref which nayb_leaf has gained else -- join pass("join_right"); incref(old_nayb_leaf,1); -- note this additional reference to the leaf wo_set_slice(nayb_leaf,1,naybl,li_stg + nayb_stg); -- nayb_leaf has changed, and old_nayb_leaf has lost one ref, which nayb_leaf carries wo_set_comp(rec,i,OM); -- delete leaf i, which is no longer used; it loses one ref incref(old_leafi,-1); -- it loses one ref wo_set_comp(rec,i,nayb_leaf); -- (note change in cumulant index of this leaf) -- re-insert the left neighbor leaf, which will have changed incref(nayb_leaf,-1); -- to compensate for the additional ref which nayb_leaf has gained end if; else -- no neighbor; replace tree by leaf pass("use_leaf"); rec := leafi; inc_refs(rec,-1); -- the tree, which was copied, can be erased end if; --print("deletion: "); end bswo_set_comp; procedure bswo_insert(rw rec,i,stg); -- component insertion operation; if i = OM then insertion at end if refcount(int_of_4(rec)) > 1 then -- must copy pass("must_copy"); stgg := dr_load(rec); new_r := dr_new_rec(); dr_setrecbuf(new_r,stgg); dr_dirtify(new_r); increfs(new_r,1); incref(rec,-1); rec := new_r; -- substitute copy for original end if; if dr_load(rec)(type_byte) = wdoccs_string_record then -- simple string record case pass("simple_in"); rl := wo_length(rec); i ?:= (rl + 1); -- if i = OM then insertion at end if i < 1 or i > rl + 1 then abort("Illegal parameter in string insertion operation" + str(i)); end if; if rl < wos_hi_lim then -- need not split pass("not_split"); wo_set_slice(rec,i,i - 1,stg); -- make the insertion return; -- done with this case end if; -- otherwise we must split this leaf into a tree pass("split"); contents := wo_slice(rec,1,wo_length(rec)); -- get the string contents of the record contents(4 * i - 3..4 * i - 4) := stg; -- make the insertion into aux string half := (rl + 1)/2; -- number of occurences to remain in leaf new_stg := dr_load(newl := dr_new_rec()); new_stg(type_byte) := wdoccs_string_record; -- create a new leaf dr_setrecbuf(newl,new_stg); dr_dirtify(newl); wo_set_slice(newl,1,0,contents(1..4 * half)); -- leaf gets left half of the children old_rec := rec; incref(old_rec,1); -- delay change in this record wo_set_slice(rec,1,rl,contents(4 * half + 1..)); -- set slice of wd occurence record (keeps right half) incref(old_rec,-1); -- end delay of change in this record stgg := dr_load(newt := dr_new_rec()); stgg(type_byte) := wdoccs_str_node_ncr; -- create a new tree dr_setrecbuf(newt,stgg); dr_dirtify(newt); wo_insert(newt,1,rec); wo_insert(newt,1,newl); -- insert the two components (at start) incref(rec,-1); incref(newl,-1); -- compensate for the extra refs introduced by the insertions rec := newt; -- substitute the tree for the original leaf return; -- done with this case end if; -- otherwise we deal with an insertion into a B_tree len_this := wo_get_cum(rec); orig_i := i; -- save, since may need to inset at end i ?:= (len_this + 1); -- if i = OM then insertion at end if i > len_this + 1 then abort("Out of range parameter in insertion operation " + str(i) + " Length is only " + str(len_this)); end if; [leafi,int_cum,-] := bb:=wo_comp_cum(rec,i min len_this); -- this returns the triple [leaf,int_cum_of_leaf,second_cum_of_leaf] prev_cum := int_cum - wo_length(leafi); -- the integer cumulant value just prior to the leaf old_leafi := leafi; -- save the old version of the leaf, for refcount manipulation if (rl := wo_length(leafi)) < wos_hi_lim then -- get the leaf addressed, and its length pass("nosplit_in"); incref(old_leafi,1); -- prevent old_leafi from losing its last ref too early wo_set_slice(leafi,i - prev_cum,i - prev_cum - 1,stg); -- make an insertion into the slice; if leafi is copied, its old version will lose 1 ref wo_set_comp(rec,orig_i,leafi); -- reset the leaf of rec, since it might have changed incref(leafi,-1); -- the variable leafi is now dead (compensate for extra ref added by set_comp) return; -- done with this case end if; -- otherwise we must split pass("split_in"); contents := wo_slice(leafi,1,wo_length(leafi)); -- get the present contents of the leaf imp := i - prev_cum; -- get the relative location of the insertion point in its string contents(4 * imp - 3..4 * imp - 4) := stg; -- make the insertion into aux string half := (rl + 1)/2; -- number of occurences to remain in leaf new_stg := dr_load(newl := dr_new_rec()); new_stg(type_byte) := wdoccs_string_record; -- create a new leaf dr_setrecbuf(newl,new_stg); dr_dirtify(newl); wo_set_slice(newl,1,0,contents(1..4 * half)); -- new leaf gets left half of the children incref(old_leafi,1); -- pervent old_leafi from losing its last ref too early wo_set_slice(leafi,1,rl,contents(4 * half + 1..)); -- set slice of leafi to remaining occs -- if leafi is copied, its old version will lose 1 ref wo_set_comp(rec,if orig_i = OM then i - 1 else i end if,leafi); -- reset the leaf of rec, since it might have changed incref(leafi,-1); -- the variable leafi is now dead (compensate for extra ref added by set_comp) wo_insert(rec,prev_cum + 1,newl); -- insert the additional leaf just before the existing one incref(newl,-1); -- compensate for the extra ref introduced by the insertion --print("tree_dump after insert: ",str(wo_dump(rec))); end bswo_insert; end big_stg_for_wdoc_pak; program test; -- test program for big_string package and its associated class use setldb,byteutil,big_stg_for_wdoc_pak; use disk_records_pak,db_records,B_tree_for_wdocstring,string_utility_pak; var orig := "1.2.3.4.5.6.7.8.9.10.11.12.13.14.15.16."; -- string for tests var orig2 := "21.22.23.24.25.26.27.28.29.30.31.32.33.34."; -- variant string for tests var orig3:= "21.22.23.24.25.26.27."; -- short string for tests var orig4:= "21.22.23.24.25.26."; -- shorter string for tests database_file_name := "bs_test_file"; -- set the name of the file to be used code_pts := bswo_code_pts; -- code points to be traversed package_tests; -- test the underlying package procedure check_mem(caption,rec); -- memory check utility incref(rec,-1); print(caption," ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); end check_mem; procedure breakex(stg); -- rearrange period-delimited string as string of 4-byte sections return "" +/ [tostr_of_4(piece): piece in breakup(stg,".") | piece /= ""]; procedure tostr_of_4(stg); reads(stg,ival); return stg_of_4(ival); end tostr_of_4; end breakex; procedure comp_piece(stg,n); -- return section of string representing n-th component return stg(4 * (n - 1) + 1..4 * n); end comp_piece; procedure set_comp_piece(rw stg,n,a_comp); -- assign section of string representing n-th component stg(4 * (n - 1) + 1..4 * n) := a_comp; end set_comp_piece; procedure insert_comp_piece(rw stg,n,a_comp); -- insert section of string representing n-th component stg(4 * (n - 1) + 1..4 * (n - 1)) := a_comp; end insert_comp_piece; procedure package_tests; -- tests of the underlying package no_error := true; rc := bswo_from_stg(s1 := breakex(orig4)); for j in [1..60] loop bswo_insert(rc,1,4 * char(j)); insert_comp_piece(s1,1,4 * char(j)); s2 := stg_from_bswostg(rc); no_error and:= (s1 = s2 and check_leaves(rc)); --wo_dump(rc); print(hexify(s1)); print(hexify(s2)); end loop; print("insertion test; 60 insertions at start: ",no_error); check_mem("memcheck 10a",rc); no_error := true; rc_orig := rc := bswo_from_stg(s1 := breakex(orig4)); incref(rc_orig,1); for j in [1..60] loop bswo_insert(rc,OM,4 * char(j)); insert_comp_piece(s1,#s1/4 + 1,4 * char(j)); s2 := stg_from_bswostg(rc); no_error and:= (s1 = s2 and check_leaves(rc)); --wo_dump(rc); print(hexify(s1)); print(hexify(s2)); end loop; print("insertion test; 60 insertions at end: ",no_error); incref(rc_orig,-1); check_mem("memcheck 10a",rc); rc := bswo_from_stg(s1 := breakex(orig)); bswo_insert(rc,1,4 * "\x66"); insert_comp_piece(s1,1,4 * "\x66"); s2 := stg_from_bswostg(rc); print("long insertion test: ",s1 = s2 and check_leaves(rc)); check_mem("memcheck 10b",rc); rc := bswo_from_stg(s1 := breakex(5 * orig)); bswo_insert(rc,1,4 * "\x66"); insert_comp_piece(s1,1,4 * "\x66"); s2 := stg_from_bswostg(rc); print("longer insertion test: ",s1 = s2 and check_leaves(rc)); check_mem("memcheck 10c",rc); rc := bswo_from_stg(s1 := breakex(orig4)); bswo_set_comp(rc,3,4 * "\x66"); set_comp_piece(s1,3,4 * "\x66"); -- test component assignment operation, short form s2 := stg_from_bswostg(rc); print("short slice assignment: ",s1 = s2 and check_leaves(rc)); check_mem("memcheck 11a",rc); rc_orig := rc := bswo_from_stg(s1 := breakex(4 * orig + orig3)); print("prelim check: ",check_leaves(rc)); incref(rc_orig,1); bswo_set_comp(rc,3,4 * "\x66"); set_comp_piece(s1,3,4 * "\x66"); -- test component assignment operation, long form s2 := stg_from_bswostg(rc); print("long slice assignment: ",s1 = s2 and check_leaves(rc)); incref(rc_orig,-1); check_mem("memcheck 11b",rc); rc := bswo_from_stg(s1 := breakex(orig4)); bswo_set_comp(rc,3,OM); set_comp_piece(s1,3,""); -- test component assignment operation, short form s2 := stg_from_bswostg(rc); print("short slice deletion: ",s1 = s2 and check_leaves(rc)); check_mem("memcheck 11c",rc); rc := bswo_from_stg(s1 := breakex(4 * orig + orig3)); print("prelim check: ",check_leaves(rc)); bswo_set_comp(rc,3,OM); set_comp_piece(s1,3,""); -- test component assignment operation, long form s2 := stg_from_bswostg(rc); print("long slice deletion: ",s1 = s2 and check_leaves(rc)); check_mem("memcheck 11d",rc); no_error := true; rc := bswo_from_stg(s1 := breakex(4 * orig + orig3)); print("before deletions: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); for j in [1..60] loop -- do 60 deletions bswo_set_comp(rc,1,OM); set_comp_piece(s1,1,""); -- test component assignment operation, long form s2 := stg_from_bswostg(rc); no_error and:= (s1 = s2 and check_leaves(rc)); --print("deletion cycle: ",j," ",s1 = s2 and check_leaves(rc)," ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if)); end loop; print("long slice, 60 deletions: ",no_error); check_mem("memcheck 11d",rc); no_error := true; rc := bswo_from_stg(s1 := breakex(4 * orig + orig3)); for j in [1..60] loop -- do 60 deletions from right end old_rc := rc; bswo_set_comp(rc,bswo_length(rc),OM); set_comp_piece(s1,#s1/4,""); -- test component assignment operation, long form if old_rc /= rc then incref(old_rc,-1); end if; -- throw away the old tree if copied s2 := stg_from_bswostg(rc); no_error and:= (s1 = s2 and check_leaves(rc)); --print("end deletion: ",j," ",s1 = s2 and check_leaves(rc)," ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if," ",hexify(rc)); end loop; print("long slice, 60 end deletions: ",no_error); check_mem("memcheck 11e",rc); report_points_passed(); -- report on code points not traversed stop; s2 := stg_from_bswostg(rc := bswo_from_stg(s1 := breakex(orig3))); print("short reconstruction: ",s1 = s2 and check_leaves(rc)); check_mem("memcheck 1",rc); s2 := stg_from_bswostg(rc := bswo_from_stg(s1 := breakex(orig))); print("reconstruction: ",s1 = s2 and check_leaves(rc)); check_mem("memcheck 1a",rc); s2 := stg_from_bswostg(rc := bswo_from_stg(s1 := breakex(orig2))); print("reconstruction2: ",s1 = s2 and check_leaves(rc)); check_mem("memcheck 1b",rc); s2 := stg_from_bswostg(rc := bswo_from_stg(s1 := breakex(5 * orig2 + orig))); print("long reconstruction: ",s1 = s2 and check_leaves(rc)); --print(hexify(s1)); check_mem("memcheck 2",rc); s2 := stg_from_bswostg(rc := bswo_from_stg(s1 := breakex(15 * orig2 + orig))); print("longer reconstruction: ",s1 = s2 and check_leaves(rc)); check_mem("memcheck 3",rc); s2 := stg_from_bswostg(rc := bswo_from_stg(s1 := breakex(15 * orig2))); print("longer reconstruction 2: ",s1 = s2 and check_leaves(rc)); check_mem("memcheck 4",rc); s2 := stg_from_bswostg(rc := bswo_from_stg(s1 := breakex(20 * orig2 + orig))); print("longer reconstruction 3: ",s1 = s2 and check_leaves(rc)); check_mem("memcheck 5",rc); no_error := true; for j in [1..10] loop s2 := bswo_comp(rc := bswo_from_stg(s1 := breakex(orig)),j); no_error and:= (comp_piece(s1,j) = s2); if j < 10 then incref(rc,-1); end if; end loop; print("comps of short reconstruction: ",no_error); check_mem("memcheck 6",rc); no_error := true; for j in [1..10] loop s2 := bswo_comp(rc := bswo_from_stg(s1 := breakex(orig + orig3)),j); no_error and:= (comp_piece(s1,j) = s2); if j < 10 then incref(rc,-1); end if; end loop; print("comps of short reconstruction: ",no_error); check_mem("memcheck 7",rc); no_error := true; for j in [1..50] loop s2 := bswo_comp(rc := bswo_from_stg(s1 := breakex(5 * orig2 + orig3)),j); no_error and:= (comp_piece(s1,j) = s2); if j < 50 then incref(rc,-1); end if; end loop; print("comps of short reconstruction: ",no_error); check_mem("memcheck 8",rc); no_error := true; for j in [1..50] loop s2 := bswo_comp(rc := bswo_from_stg(s1 := breakex(5 * orig)),j); no_error and:= (comp_piece(s1,j) = s2); if j < 50 then incref(rc,-1); end if; end loop; print("comps of short reconstruction: ",no_error); check_mem("memcheck 9",rc); no_error := true; for j in [1..50] loop s2 := bswo_comp(rc := bswo_from_stg(s1 := breakex(2 * orig + orig3)),j); no_error and:= (comp_piece(s1,j) = s2); if j < 50 then incref(rc,-1); end if; end loop; print("comps of short reconstruction: ",no_error); check_mem("memcheck 10",rc); rc := bswo_from_stg(s1 := breakex(4 * orig + orig3)); bswo_set_comp(rc,3,4 * "\x66"); set_comp_piece(s1,3,4 * "\x66"); -- test component assignment operation, long form s2 := stg_from_bswostg(rc); print("long slice shortening insert: ",s1 = s2 and check_leaves(rc)); check_mem("memcheck 11",rc); rc := bswo_from_stg(s1 := breakex(4 * orig + orig3)); bswo_set_comp(rc,3,OM); set_comp_piece(s1,3,""); -- test component deletion operation, long form s2 := stg_from_bswostg(rc); print("long slice shortening insert: ",s1 = s2 and check_leaves(rc)); check_mem("memcheck 21",rc); rc := bswo_from_stg(s1 := breakex(orig + orig3)); bswo_set_comp(rc,3,4 * "\x66"); s1(3..2) := "XX"; -- test slice insertion operation, long form s2 := stg_from_bswostg(rc); print("long slice insert: ",s1 = s2 and check_leaves(rc)); check_mem("memcheck 22",rc); rc := bswo_from_stg(s1 := orig + orig3); bswo_set_comp(rc,3,4 * "\x66");s1(3..2) := 3 * orig2; -- test slice insertion operation, long form s2 := stg_from_bswostg(rc); print("long slice long insert: ",s1 = s2 and check_leaves(rc)); check_mem("memcheck 23",rc); rc := bswo_from_stg(s1 := orig); bswo_set_comp(rc,1,4 * "\x66"); s1(1..3) := "XX"; -- test slice assignment operation, short form s2 := stg_from_bswostg(rc); print("short slice shrink-a-bit: ",s1 = s2 and check_leaves(rc)); check_mem("memcheck 24",rc); rc := bswo_from_stg(s1 := orig); bswo_set_comp(rc,3,OM); s1(1..3) := ""; -- test slice assignment operation, short form s2 := stg_from_bswostg(rc); print("short slice shrinkage: ",s1 = s2 and check_leaves(rc)); check_mem("memcheck 25",rc); rc := bswo_from_stg(s1 := orig); bswo_set_comp(rc,3,"YYYYY"); s1(1..3) := "YYYYY"; -- test slice assignment operation, short form s2 := stg_from_bswostg(rc); print("short slice expand: ",s1 = s2 and check_leaves(rc)); check_mem("memcheck 26",rc); rc := bswo_from_stg(s1 := orig); bswo_insert(rc,1,"ZZZZZ"); s1(1..0) := "ZZZZZ"; -- test slice assignment operation, short form s2 := stg_from_bswostg(rc); print("short slice insert: ",s1 = s2 and check_leaves(rc)); check_mem("memcheck 27",rc); rc := bswo_from_stg(s1 := orig + orig3); rl := bs_length(rc); bswo_insert(rc,OM,"XX"); s1(rl + 1..rl) := "XX"; -- test slice append operation, short form s2 := stg_from_bswostg(rc); print("long slice append: ",s1 = s2 and check_leaves(rc)); check_mem("memcheck 28",rc); rc := bswo_from_stg(s1 := 2 * orig + orig3); rl := bs_length(rc); bswo_insert(rc,OM,"WWWWWWW"); s1(rl + 1..rl) := "WWWWWWW"; -- test slice append operation, long form s2 := stg_from_bswostg(rc); print("long slice append 2: ",s1 = s2 and check_leaves(rc)); check_mem("memcheck 29",rc); rc := bswo_from_stg(s1 := 5 * orig2 + orig3); rl := bs_length(rc); bswo_insert(rc,OM,"YYYYY"); s1(rl + 1..rl) := "YYYYY"; -- test slice append operation, longer form s2 := stg_from_bswostg(rc); print("long slice append 3: ",s1 = s2 and check_leaves(rc)); check_mem("memcheck 30",rc); rc := bswo_from_stg(s1 := 5 * orig2 + orig3); rl := bs_length(rc); bswo_insert(rc,OM,3 * orig); s1(rl + 1..rl) := 3 * orig; -- test slice append operation, longer form s2 := stg_from_bswostg(rc); print("long slice long append: ",s1 = s2 and check_leaves(rc)); check_mem("memcheck 30A",rc); end package_tests; procedure check_leaves(rec); -- checks that leaves have required maximum and minimum lengths if (contents := dr_load(rec))(type_byte) = wdoccs_string_record then -- node is a leaf if (srl := wo_length(rec)) > wos_hi_lim then print("EXCESSIVE STRING LENGTH DETECTED IN NODE ",hexify(rec)," ",hexify(contents)); return false; end if; return true; -- node is ok end if; -- otherwise we proceed recursively return forall j in [1..num_childr(rec)] | check_leaves_in(wo_voc(rec,j)); procedure check_leaves_in(rec); -- inner workhorse if (contents := dr_load(rec))(type_byte) = wdoccs_string_record then -- node is a leaf if (srl := wo_length(rec)) > wos_hi_lim then print("EXCESSIVE STRING LENGTH DETECTED IN LEAF ",hexify(rec)," ",srl," ",hexify(contents)); return false; end if; if srl < wos_low_lim then print("INSUFFICIENT STRING LENGTH DETECTED IN LEAF ",hexify(rec)," ",srl," ",hexify(contents)); return false; end if; return true; -- leaf is ok end if; -- otherwise we proceed recursively return forall j in [1..num_childr(rec)] | check_leaves_in(wo_voc(rec,j)); end check_leaves_in; end check_leaves; procedure big_string_class_test; -- test program for big_string class the_stg := big_string(s1 := orig); print("string length and selfstr: ",#the_stg = #s1," ",str(the_stg) = s1); print("short slice class: ",the_stg(5..95) = s1(5..95) and check_leaves(the_stg___rec)); check_mem("memcheck 31",the_stg___rec); the_stg := big_string(s1 := orig + orig3); print("long slice class: ",the_stg(5..130) = s1(5..130) and check_leaves(the_stg___rec)); check_mem("memcheck 32",the_stg___rec); the_stg := big_string(s1 := 5 * orig2 + orig3); print("longer slice class: ",the_stg(5..630) = s1(5..630) and check_leaves(the_stg___rec)); check_mem("memcheck 33",the_stg___rec); the_stg := big_string(s1 := 5 * orig); print("longer slice class 2: ",the_stg(5..620) = s1(5..620) and check_leaves(the_stg___rec)); check_mem("memcheck 34",the_stg___rec); the_stg := big_string(s1 := 2 * orig + orig3); print("longer slice class 3: ",the_stg(5..255) = s1(5..255) and check_leaves(the_stg___rec)); check_mem("memcheck 35",the_stg___rec); the_stg := big_string(""); the_stg(1..0) := 5 * "abcde"; print("insert from null: ",str(the_stg) = 5 * "abcde"); the_stg(26..25) := 5 * orig; print("tail insert from null: ", str(the_stg) = (5 * "abcde" + 5 * orig) and check_leaves(the_stg___rec)); -- check_mem("memcheck 36",the_stg___rec); the_stg := big_string(ori := ori_copy := 5 * "abcde" + 5 * orig); --print("the_stg after creation: ",str(the_stg)," --- the_stg: ",the_stg); -- ?????????? SETL PRINT BUG ?????????? --print("the_stg after creation: ",str(the_stg)); -- THIS WORKS OK; PREVIOUS LINE DOES NOT print("string length, selfstr, length check,selfstr check: ",#the_stg = #ori," ",str(the_stg) = ori); stg_copy := the_stg; incref(the_stg___rec,1); -- note that an extra copy has been created print("slice extraction and original: ",the_stg(1..10) = ori(1..10) and check_leaves(the_stg___rec)); print("second slice extraction and original: ",the_stg(10..20) = ori(10..20)); print("slice extraction check: ",the_stg(1..10) = ori(1..10)); print("second slice extraction check: ",the_stg(10..20) = ori(10..20)); print("tail slice check: ",the_stg(10..#ori) = ori(10..#ori)); the_stg(1..0) := (extra := "XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX."); ori2 := ori; ori(1..0) := extra; print("concatenation of 50 characters at start: ",#the_stg," ",str(the_stg) = ori and check_leaves(the_stg___rec)); print("the_stg: ",str(the_stg)); print(); print(str(stg_copy)); incref(the_stg___rec,-1); -- delete the string -- we must try long and short pure insertions, long and short impure insertions -- into a single section, and long and short insertions which erase longer -- runs the_stg := stg_copy; incref(stg_copy___rec,1); ori := ori_copy; ori(1..0) := "XXX"; the_stg(1..0) := "XXX"; print("insertion of 3 characters at start: ",#the_stg," ",str(the_stg) = ori); -- pure short insertion case incref(the_stg___rec,-1); -- delete the string the_stg := stg_copy; incref(stg_copy___rec,1); ori := ori_copy; ori(21..20) := "XXX"; the_stg(21..20) := "XXX"; print("insertion of 3 characters after position 20: ",#the_stg," ",str(the_stg) = ori); -- pure short insertion case incref(the_stg___rec,-1); -- delete the string the_stg := stg_copy; incref(stg_copy___rec,1); ori := ori_copy; ori(10..12) := "XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX."; the_stg(10..12) := "XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX."; print("50 Xs replace chars 10 thru 12: ",#the_stg," ",str(the_stg) = ori); incref(the_stg___rec,-1); -- delete the string the_stg := stg_copy; incref(stg_copy___rec,1); ori := ori_copy; the_stg(10..400) := "XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX."; ori(10..400) := "XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX."; print("50 Xs replace chars 10 thru 400: ",#the_stg," ",str(the_stg) = ori); --the_stg.print_raw(); incref(the_stg___rec,-1); -- delete the string the_stg := stg_copy; incref(stg_copy___rec,1); ori := ori_copy; ori(10..12) := "XXX"; the_stg(10..12) := "XXX"; print("3 Xs replace chars 10 thru 12: ",#the_stg," ",str(the_stg) = ori); incref(the_stg___rec,-1); -- delete the string the_stg := stg_copy; incref(stg_copy___rec,1); ori := ori_copy; ori(150..625) := "XXX"; the_stg(150..625) := "XXX"; print("3 Xs replace chars 150 thru 625: ",#the_stg," ",str(the_stg) = ori); incref(the_stg___rec,-1); -- delete the string the_stg := stg_copy; incref(stg_copy___rec,1); ori := ori_copy; ori(10..400) := "XXX"; the_stg(10..400) := "XXX"; print("3 Xs replace chars 10 thru 40: ",#the_stg," ",str(the_stg) = ori); incref(the_stg___rec,-1); -- delete the string the_stg := stg_copy; incref(stg_copy___rec,1); ori := ori_copy; ori(10..12) := ""; the_stg(10..12) := ""; print("chars 10 thru 12 deleted: ",#the_stg," ",str(the_stg) = ori); -- pure deletion case incref(the_stg___rec,-1); -- delete the string the_stg := stg_copy; incref(stg_copy___rec,1); ori := ori_copy; ori(10..400) := ""; the_stg(10..400) := ""; print("chars 10 thru 40 deleted: ",#the_stg," ",str(the_stg) = ori); -- pure long deletion case incref(the_stg___rec,-1); -- delete the string print("first character deletion test"); ori := ori_copy; the_stg1 := stg_copy; incref(stg_copy___rec,1); the_stg1(1..1) := ""; ori(1..1) := ""; print(str(the_stg1) = ori); the_stg2 := the_stg1; incref(the_stg1___rec,1); the_stg2(1..1) := ""; ori(1..1) := ""; print(str(the_stg2) = ori); the_stg3 := the_stg2; incref(the_stg2___rec,1); the_stg3(1..1) := ""; ori(1..1) := ""; print(str(the_stg3) = ori); incref(the_stg3___rec,-1); incref(the_stg2___rec,-1); incref(the_stg1___rec,-1); -- delete the string copy check_mem("memcheck 37",stg_copy___rec); -- final erasure end big_string_class_test; end test;