Personal tools
You are here: Home Projects SETL SETL2 Source code DB_bs_wdoc.stl
Document Actions

DB_bs_wdoc.stl

by Paul McJones last modified 2021-02-25 11:21

"Specialized big string for storing word occurence lists."

--file Big_stg_for_wdoc.stl  
package big_stg_for_wdoc_pak;		-- specialized big string for storing word occurence lists

const bswo_code_pts := {"set_copy", "set_simple", "not_dangerous", "share_left", "join_left", "share_right", 
		"join_right", "use_leaf", "must_copy", "simple_in", "not_split", "split", "nosplit_in", "split_in"};
				-- code points to be traversed

	procedure bswo_from_stg(stg);			-- create a big string from a string
	procedure stg_from_bswostg(rec);		-- make a string from a bswostg
	procedure bswo_comp(rec,i);				-- get i-th component
	procedure bswo_set_comp(rw rec,i,stg);	-- component assignment operation
	procedure bswo_insert(rw rec,i,stg);	-- component insertion operation; if i = OM then insertion at end
												
												-- (PUBLIC FOR DEBUGGING ONLY)
	procedure wo_voc(rec,j);				-- j'th member of vector of children for B_tree_for_bigstring records
	procedure bswo_length(rec);				-- total length of a bswostg

end big_stg_for_wdoc_pak;

package body big_stg_for_wdoc_pak;		-- specialized big string for storing word occurence lists

use setldb,byteutil,disk_records_pak,db_records,B_tree_for_wdocstring,string_utility_pak;

procedure bswo_from_stg(stg);					-- make a big_string from a string

												-- we first chop up the string into a tuple of string records
	if #(pieces_tup := chop_up(stg)) = 1 then return make_record(pieces_tup(1)); end if;
						-- see comment on the 'chop_up' routine, below. If the whole string fits in 
						-- just one string record, we simply return that record.
--print("pieces_tup: ",[hexify(piece): piece in pieces_tup]); 
	return wo_make_from_tuple(pieces_tup);		-- otherwise convert the list of sections returned into a tree

end bswo_from_stg;

procedure stg_from_bswostg(rec);					-- make a string from a big_string

	if dr_load(rec)(type_byte) = wdoccs_string_record then return wo_slice(rec,1,wo_length(rec)); end if;
		-- get string from record

	if not dr_is_compound(rec) then 			-- concatenate strings from records
		return  "" +/ [wo_slice(ch,1,wo_length(ch)): j in [1..num_childr(rec)] | (ch := wo_voc(rec,j)) /= OM];
	end if;

	return "" +/ [stg_from_bswostg(wo_voc(rec,j)): j in [1..num_childr(rec)]];	-- else proceed recursively
	
end stg_from_bswostg;

procedure chop_up(stg);		-- chop the concatenation of three strings into a tuple of string records

	-- if all three pieces fit into one, two or three sections, return these. Otherwise join as much as possible of the 
	-- middle into two pieces, and then the reminder of the middle into approximately equal-sized pieces.
	-- first and lat are assumed to be no more than one record size each

	if (ns := #stg)/4 <= wos_hi_lim then return [stg]; end if; 		-- put all into one piece
	
	psm1 := 4 * wos_hi_lim - 1;
	
	if (extra_part := ns mod (4 * wos_hi_lim)) = 0 then		-- fits into a list of full pieces
	
		return [stg(j..j + psm1): j in [1,4 * wos_hi_lim + 1..ns - psm1]];
	
	else												-- there is a bit left over

		start_last_2 := ns - psm1 - extra_part;			-- starting character of last 2 sections
		mid_last_2 := start_last_2 + 4 * ((4 * wos_hi_lim + extra_part)/8) - 1;
							-- middle character of last 2 sections
						
											-- use list of all but 1 full piece, plus two smaller pieces.
		return [stg(j..j + psm1): j in [1,4 * wos_hi_lim + 1..ns - 2 * psm1 - extra_part]]
			 + [stg(start_last_2..mid_last_2),stg(mid_last_2 + 1..)];
	
	end if;
	
end chop_up;

procedure bswo_comp(rec,i);			-- component extraction operation. the 'node' can be either 
									-- the top of a tree, or can be a simple wdoccs_string_record
--print("bswo_comp: ",i);	
	if i < 1 then 
		abort("Illegal parameters in record id extraction operation "+ str(i));
	end if;
			
	if dr_load(rec)(type_byte) = wdoccs_string_record then		-- simple string record case

		if i > (sl := wo_length(rec)) then 				-- get string from record
			abort("Illegal item index in record id extraction operation: " + str(i) + ", " + str(sl));
		end if;
--print("bswo_comp bottom: ",i," ",wo_slice(rec,i,i));		
		return wo_slice(rec,i,i); 

	end if;			-- otherwise we have the tree case

							-- get the child containing the first character past i 
	if not (exists chixi in [1..nc := num_childr(rec)] | wo_get_ch_cum(rec,chixi) >= i) then 
		abort("Illegal component index in string extraction operation: " + str(i) 
											+ ", length is only " + str(wo_get_ch_cum(rec,nc)));
	end if;
	
	prev_cumi := if chixi = 1 then 0 else wo_get_ch_cum(rec,chixi - 1) end if;	-- the cumulant previous to i
	
	return bswo_comp(wo_voc(rec,chixi),i - prev_cumi);		-- proceed recursively

end bswo_comp;

procedure wo_voc(rec,j);		-- j'th member of vector of children for B_tree_for_wdocstring records

	stg := dr_load(rec);					-- load this string
	cjstrt := (j - 1) * 4 + wo_ch_start; 
	return stg(cjstrt..cjstrt + 3);		-- return child rec
	
end wo_voc;

procedure bswo_length(rec);				-- total length of a bswostg
	return if dr_load(rec)(type_byte) = wdoccs_string_record then wo_length(rec) else wo_get_cum(rec) end if;
end bswo_length;

procedure wo_get_ch_cum(rec,j);	-- get the cum value for the j-th child of this node
	
	return int_of_5(dr_load(rec)(wo_cum_start + (j - 1) * 5..wo_cum_start + j * 5 - 1));
	
end wo_get_ch_cum;

procedure make_record(stg);		-- creates one record from string

	rec := set_type(dr_new_rec(),wdoccs_string_record); wo_set_slice(rec,1,0,stg);
	return rec;
	
end make_record;

procedure pass_pass(stg); print(stg); end pass_pass;			-- debug print

procedure bswo_set_comp(rw rec,i,stg);		-- component assignment operation; if stg = OM then have deletion
--print("bswo_set_comp: ",i," ",stg," ",abs(dr_load(rec)(type_byte))," ",type(stg));
											-- since rec will change, ensure that it has just one copy. 
											-- This will up the refcounts of its children if necessary

	if refcount(int_of_4(rec)) > 1 then						-- must copy
pass("set_copy");
		stgg := dr_load(rec); new_r := dr_new_rec(); 
		dr_setrecbuf(new_r,stgg);
		dr_dirtify(new_r);
		increfs(new_r,1); incref(rec,-1); rec := new_r;		-- substitute copy for original
	end if;

	if (contents := dr_load(rec))(type_byte) = wdoccs_string_record then		-- simple string record case
pass("set_simple");
		if i < 1 or i > wo_length(rec) then 
			abort("Illegal parameter in string assignment operation" + str(i));
		end if;

		wo_set_slice(rec,i,i,stg?"");		-- if stg = OM then have deletion		

		return;
		
	end if;		-- otherwise we deal with the B-tree case

	if i > (len_this := wo_get_cum(rec)) + 1 then 
		abort("Out of range parameter in string assignment operation " + str(i) + " Length is only " + str(len_this));
	end if;
	
	[leafi,int_cum,-] := wo_comp_cum(rec,i);		-- get the leaf addressed
	prev_cum := int_cum - wo_length(leafi);	 		-- the integer cumulant value just prior to the leaf
	old_leafi := leafi;					-- save the old version of the leaf, for refcount manipulation

	incref(old_leafi,1);							-- delay eliminating reference to this leaf
--print("type(stg) before: ",type(stg));			-- APPARENT BUG **********
	stg_not_om := stg /= OM;						-- workaround for APPARENT BUG **********
	wo_set_slice(leafi,i - prev_cum,i - prev_cum,stg?"");
							-- replace slice; leafi is copied, its old version will lose 1 ref
	
--print("type(stg): ",type(stg));				-- APPARENT BUG **********
	if (lni := wo_length(leafi)) > wos_low_lim or stg_not_om then 	-- not a dangerous deletion, so we can finish easily
--print("stg: ",stg," ",lni," ",wos_low_lim," ",type(stg));
pass("not_dangerous");
		wo_set_comp(rec,i,leafi);	-- reset the leaf of rec, since it might have changed
		incref(leafi,-1);			-- the variable leafi is now dead (compensate for extra ref added by set_comp)
		return;						-- done with this case

	end if;
				-- otherwise we must either share or join with right or left neighbor leaf
	
	if prev_cum > 1 then					-- use left neighbor leaf

		naybl := wo_length(nayb_leaf := wo_comp(rec,prev_cum));
		nayb_stg := wo_slice(nayb_leaf,1,naybl);			-- get the two strings
		li_stg := wo_slice(leafi,1,lni);

		old_nayb_leaf := nayb_leaf;			-- save the old versions of the leaves, for refcount manipulation
		old_leafi := leafi;	
		
		if naybl > wos_low_lim then		-- share with left
pass("share_left");
			half := (lni + naybl)/2;				-- part of the string that will remain with left neighbor
			incref(old_nayb_leaf,1);				-- delay eliminating reference to the leaf
			wo_set_slice(nayb_leaf,1,naybl,nayb_stg(1.. 4 * half));
					-- nayb_leaf has changed, old_nayb_leaf has lost one ref, which nayb_leaf carries

			wo_set_slice(leafi,1,lni,nayb_stg(4 * half + 1..) + li_stg);
					-- leafi has changed, old_leafi has lost one ref, which leafi carries

			wo_set_comp(rec,i,leafi);				-- re-insert leafi, which has changed
			incref(leafi,-1);			-- the variable leafi is now dead (compensate for extra ref added by set_comp)

			wo_set_comp(rec,prev_cum,nayb_leaf);			-- re-insert nayb_leaf, which has changed
			incref(nayb_leaf,-1);							-- compensate for extra ref added by set_comp

		else																			-- join
pass("join_left");
			incref(old_nayb_leaf,1);				-- delay eliminating reference to the leaf
			wo_set_slice(nayb_leaf,1,naybl,nayb_stg + li_stg);              -- nayb_leaf has changed from old_nayb_leaf
--print("wo_set_slice: ",i," ",num_childr(rec)," ",hexify(nayb_stg + li_stg)," ",hexify(nayb_stg)," ",hexify(li_stg));
			wo_set_comp(rec,i,OM);		-- delete leaf i, which is no longer used; it loses one ref
			incref(old_leafi,-1);							-- it loses one ref

			wo_set_comp(rec,prev_cum,nayb_leaf);		-- insert the changed neighbor leaf
			incref(nayb_leaf,-1);							-- compensate for extra ref added by set_comp
			
		end if;
		
	elseif i < len_this then		-- use right neighbor leaf

		naybl := wo_length(nayb_leaf := wo_comp(rec,int_cum + 1));
		nayb_stg := wo_slice(nayb_leaf,1,naybl);			-- get the two strings
		li_stg := wo_slice(leafi,1,lni);

		old_nayb_leaf := nayb_leaf;			-- save the old versions of the leaves, for refcount manipulation
		old_leafi := leafi;

		if naybl > wos_low_lim then		-- share with right
pass("share_right");
			half := (lni + naybl)/2;				-- part of the string that will remain with left neighbor
			incref(old_nayb_leaf,1);						-- delay eliminating reference to the leaf
			wo_set_slice(nayb_leaf,1,naybl,nayb_stg(4 * (half - lni) + 1..));
					-- if nayb_leaf has changed, old_nayb_leaf has lost one ref, which nayb_leaf carries

			wo_set_slice(leafi,1,lni,li_stg + nayb_stg(1..4 * (half - lni)));
					-- leafi has changed, old_leafi has lost one ref, which leafi carries

			wo_set_comp(rec,i,leafi);				-- re-insert leaf i
			incref(leafi,-1);			-- the variable leafi is now dead (compensate for extra ref added by set_comp)

			wo_set_comp(rec,int_cum + 1 + half - lni,nayb_leaf);	-- (note change in cumulant index of this leaf)
						-- re-insert the left neighbor leaf, which will have changed
			incref(nayb_leaf,-1);			-- to compensate for the additional ref which nayb_leaf has gained

		else																			-- join
pass("join_right");
			incref(old_nayb_leaf,1);							-- note this additional reference to the leaf
			wo_set_slice(nayb_leaf,1,naybl,li_stg + nayb_stg);
					-- nayb_leaf has changed, and old_nayb_leaf has lost one ref, which nayb_leaf carries

			wo_set_comp(rec,i,OM);		-- delete leaf i, which is no longer used; it loses one ref
			incref(old_leafi,-1);							-- it loses one ref
			
			wo_set_comp(rec,i,nayb_leaf);	-- (note change in cumulant index of this leaf)
						-- re-insert the left neighbor leaf, which will have changed
			incref(nayb_leaf,-1);			-- to compensate for the additional ref which nayb_leaf has gained

		end if;
	
	else							-- no neighbor; replace tree by leaf
pass("use_leaf");		
		rec := leafi; inc_refs(rec,-1);		-- the tree, which was copied, can be erased
		
	end if;
	
--print("deletion: ");	

end bswo_set_comp;

procedure bswo_insert(rw rec,i,stg);	-- component insertion operation; if i = OM then insertion at end
	if refcount(int_of_4(rec)) > 1 then						-- must copy
pass("must_copy");
		stgg := dr_load(rec); new_r := dr_new_rec(); 
		dr_setrecbuf(new_r,stgg);
		dr_dirtify(new_r);
		increfs(new_r,1); incref(rec,-1); rec := new_r;		-- substitute copy for original
	end if;

	if dr_load(rec)(type_byte) = wdoccs_string_record then		-- simple string record case
pass("simple_in");		
		rl := wo_length(rec);
		
		i ?:= (rl + 1);		-- if i = OM then insertion at end
		 
		if i < 1 or i > rl + 1 then 
			abort("Illegal parameter in string insertion operation" + str(i));
		end if;

		if rl < wos_hi_lim then		-- need not split
pass("not_split");
			wo_set_slice(rec,i,i - 1,stg);		-- make the insertion		
			return;		-- done with this case
		end if;			-- otherwise we must split this leaf into a tree
pass("split");		
		contents := wo_slice(rec,1,wo_length(rec));		-- get the string contents of the record
		contents(4 * i - 3..4 * i - 4) := stg;	-- make the insertion into aux string
		half := (rl + 1)/2;		-- number of occurences to remain in leaf

		new_stg := dr_load(newl := dr_new_rec()); new_stg(type_byte) := wdoccs_string_record; -- create a new leaf
		dr_setrecbuf(newl,new_stg);
		dr_dirtify(newl);
		wo_set_slice(newl,1,0,contents(1..4 * half));	-- leaf gets left half of the children
		
		old_rec := rec; incref(old_rec,1);		-- delay change in this record
		wo_set_slice(rec,1,rl,contents(4 * half + 1..));	-- set slice of wd occurence record (keeps right half)
		incref(old_rec,-1);		-- end delay of change in this record

		stgg := dr_load(newt := dr_new_rec()); stgg(type_byte) := wdoccs_str_node_ncr; -- create a new tree
		dr_setrecbuf(newt,stgg);
		dr_dirtify(newt);
		wo_insert(newt,1,rec); wo_insert(newt,1,newl); 		-- insert the two components (at start)
		incref(rec,-1); incref(newl,-1); 		-- compensate for the extra refs introduced by the insertions
		
		rec := newt;		-- substitute the tree for the original leaf

		return;							-- done with this case
		
	end if;		-- otherwise we deal with an insertion into a B_tree
	
	len_this := wo_get_cum(rec);
	orig_i := i;				-- save, since may need to inset at end
	i ?:= (len_this + 1);		-- if i = OM then insertion at end

	if i > len_this + 1 then 
		abort("Out of range parameter in insertion operation " + str(i) + " Length is only " + str(len_this));
	end if;

	[leafi,int_cum,-] := bb:=wo_comp_cum(rec,i min len_this);
						 	-- this returns the triple [leaf,int_cum_of_leaf,second_cum_of_leaf]
	prev_cum := int_cum - wo_length(leafi);	 		-- the integer cumulant value just prior to the leaf
	
	old_leafi := leafi;			-- save the old version of the leaf, for refcount manipulation
	
	if (rl := wo_length(leafi)) < wos_hi_lim then		-- get the leaf addressed, and its length
pass("nosplit_in");
		incref(old_leafi,1);		-- prevent old_leafi from losing its last ref too early
		wo_set_slice(leafi,i - prev_cum,i - prev_cum - 1,stg);
						-- make an insertion into the slice; if leafi is copied, its old version will lose 1 ref
		wo_set_comp(rec,orig_i,leafi);	-- reset the leaf of rec, since it might have changed
		incref(leafi,-1);			-- the variable leafi is now dead (compensate for extra ref added by set_comp)
		return;						-- done with this case
		
	end if;				-- otherwise we must split
pass("split_in");
	contents := wo_slice(leafi,1,wo_length(leafi));				-- get the present contents of the leaf

	imp := i - prev_cum;			-- get the relative location of the insertion point in its string		
	contents(4 * imp - 3..4 * imp - 4) := stg;	-- make the insertion into aux string
	half := (rl + 1)/2;		-- number of occurences to remain in leaf

	new_stg := dr_load(newl := dr_new_rec()); new_stg(type_byte) := wdoccs_string_record; -- create a new leaf
		dr_setrecbuf(newl,new_stg);
		dr_dirtify(newl);
	wo_set_slice(newl,1,0,contents(1..4 * half));	-- new leaf gets left half of the children
	
	incref(old_leafi,1);		-- pervent old_leafi from losing its last ref too early
	wo_set_slice(leafi,1,rl,contents(4 * half + 1..));	-- set slice of leafi to remaining occs
				-- if leafi is copied, its old version will lose 1 ref


	wo_set_comp(rec,if orig_i = OM then i - 1 else i end if,leafi);	-- reset the leaf of rec, since it might have changed
	incref(leafi,-1);			-- the variable leafi is now dead (compensate for extra ref added by set_comp)

	wo_insert(rec,prev_cum + 1,newl); 		-- insert the additional leaf just before the existing one
	incref(newl,-1); 						-- compensate for the extra ref introduced by the insertion
--print("tree_dump after insert: ",str(wo_dump(rec)));

end bswo_insert;

end big_stg_for_wdoc_pak;

program test;		-- test program for big_string package and its associated class
use setldb,byteutil,big_stg_for_wdoc_pak;
use disk_records_pak,db_records,B_tree_for_wdocstring,string_utility_pak;

var orig := "1.2.3.4.5.6.7.8.9.10.11.12.13.14.15.16.";			-- string for tests
var orig2 := "21.22.23.24.25.26.27.28.29.30.31.32.33.34.";		-- variant string for tests
var orig3:= "21.22.23.24.25.26.27.";							-- short string for tests
var orig4:= "21.22.23.24.25.26.";								-- shorter string for tests
database_file_name := "bs_test_file";           -- set the name of the file to be used

code_pts := bswo_code_pts;		-- code points to be traversed

package_tests;													-- test the underlying package

procedure check_mem(caption,rec);			-- memory check utility
	incref(rec,-1); print(caption," ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if));
end check_mem;

procedure breakex(stg);		-- rearrange period-delimited string as string of 4-byte sections

	return "" +/ [tostr_of_4(piece): piece in breakup(stg,".") | piece /= ""];

	procedure tostr_of_4(stg); reads(stg,ival); return stg_of_4(ival); end tostr_of_4;

end breakex;

procedure comp_piece(stg,n);		-- return section of string representing n-th component
	return stg(4 * (n - 1) + 1..4 * n);
end comp_piece;

procedure set_comp_piece(rw stg,n,a_comp);		-- assign section of string representing n-th component
	stg(4 * (n - 1) + 1..4 * n) := a_comp;
end set_comp_piece;

procedure insert_comp_piece(rw stg,n,a_comp);		-- insert section of string representing n-th component
	stg(4 * (n - 1) + 1..4 * (n - 1)) := a_comp;
end insert_comp_piece;

procedure package_tests;		-- tests of the underlying package

no_error := true;
rc := bswo_from_stg(s1 := breakex(orig4)); 

for j in [1..60] loop
	bswo_insert(rc,1,4 * char(j)); insert_comp_piece(s1,1,4 * char(j));
	s2 := stg_from_bswostg(rc); no_error and:= (s1 = s2 and check_leaves(rc));
--wo_dump(rc); print(hexify(s1)); print(hexify(s2));
end loop;
print("insertion test; 60 insertions at start: ",no_error); check_mem("memcheck 10a",rc);

no_error := true;
rc_orig := rc := bswo_from_stg(s1 := breakex(orig4)); incref(rc_orig,1);

for j in [1..60] loop
	bswo_insert(rc,OM,4 * char(j)); insert_comp_piece(s1,#s1/4  + 1,4 * char(j));
	s2 := stg_from_bswostg(rc); no_error and:= (s1 = s2 and check_leaves(rc));
--wo_dump(rc); print(hexify(s1)); print(hexify(s2));
end loop;
print("insertion test; 60 insertions at end: ",no_error); incref(rc_orig,-1); check_mem("memcheck 10a",rc);

rc := bswo_from_stg(s1 := breakex(orig)); 
bswo_insert(rc,1,4 * "\x66"); insert_comp_piece(s1,1,4 * "\x66");
s2 := stg_from_bswostg(rc); print("long insertion test: ",s1 = s2 and check_leaves(rc));
check_mem("memcheck 10b",rc);

rc := bswo_from_stg(s1 := breakex(5 * orig)); 
bswo_insert(rc,1,4 * "\x66"); insert_comp_piece(s1,1,4 * "\x66");
s2 := stg_from_bswostg(rc); print("longer insertion test: ",s1 = s2 and check_leaves(rc));
check_mem("memcheck 10c",rc);

rc := bswo_from_stg(s1 := breakex(orig4)); 
bswo_set_comp(rc,3,4 * "\x66");	set_comp_piece(s1,3,4 * "\x66");	-- test component assignment operation, short form
s2 := stg_from_bswostg(rc); print("short slice assignment: ",s1 = s2 and check_leaves(rc));
check_mem("memcheck 11a",rc);

rc_orig := rc := bswo_from_stg(s1 := breakex(4 * orig + orig3)); print("prelim check: ",check_leaves(rc));
incref(rc_orig,1);
bswo_set_comp(rc,3,4 * "\x66");	set_comp_piece(s1,3,4 * "\x66");	-- test component assignment operation, long form
s2 := stg_from_bswostg(rc); print("long slice assignment: ",s1 = s2 and check_leaves(rc));
incref(rc_orig,-1); check_mem("memcheck 11b",rc);

rc := bswo_from_stg(s1 := breakex(orig4)); 
bswo_set_comp(rc,3,OM);	set_comp_piece(s1,3,"");	-- test component assignment operation, short form
s2 := stg_from_bswostg(rc); print("short slice deletion: ",s1 = s2 and check_leaves(rc));
check_mem("memcheck 11c",rc);

rc := bswo_from_stg(s1 := breakex(4 * orig + orig3)); print("prelim check: ",check_leaves(rc));
bswo_set_comp(rc,3,OM);	set_comp_piece(s1,3,"");	-- test component assignment operation, long form
s2 := stg_from_bswostg(rc); print("long slice deletion: ",s1 = s2 and check_leaves(rc));
check_mem("memcheck 11d",rc);

no_error := true;
rc := bswo_from_stg(s1 := breakex(4 * orig + orig3));
print("before deletions: ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if));

for j in [1..60] loop		-- do 60 deletions
	bswo_set_comp(rc,1,OM);	set_comp_piece(s1,1,"");	-- test component assignment operation, long form
	s2 := stg_from_bswostg(rc); no_error and:= (s1 = s2 and check_leaves(rc));
--print("deletion cycle: ",j," ",s1 = s2 and check_leaves(rc)," ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if));
end loop;
print("long slice, 60 deletions: ",no_error); check_mem("memcheck 11d",rc);

no_error := true;
rc := bswo_from_stg(s1 := breakex(4 * orig + orig3));

for j in [1..60] loop		-- do 60 deletions from right end
	old_rc := rc;
	bswo_set_comp(rc,bswo_length(rc),OM);	set_comp_piece(s1,#s1/4,"");	-- test component assignment operation, long form
	 if old_rc /= rc then incref(old_rc,-1); end if;                 -- throw away the old tree if copied
	s2 := stg_from_bswostg(rc); no_error and:= (s1 = s2 and check_leaves(rc));
--print("end deletion: ",j," ",s1 = s2 and check_leaves(rc)," ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if," ",hexify(rc));
end loop;
print("long slice, 60 end deletions: ",no_error); check_mem("memcheck 11e",rc);

report_points_passed(); 	-- report on code points not traversed
stop;

s2 := stg_from_bswostg(rc := bswo_from_stg(s1 := breakex(orig3))); print("short reconstruction: ",s1 = s2 and check_leaves(rc));
check_mem("memcheck 1",rc);

s2 := stg_from_bswostg(rc := bswo_from_stg(s1 := breakex(orig))); print("reconstruction: ",s1 = s2 and check_leaves(rc));
check_mem("memcheck 1a",rc);
s2 := stg_from_bswostg(rc := bswo_from_stg(s1 := breakex(orig2))); print("reconstruction2: ",s1 = s2 and check_leaves(rc));
check_mem("memcheck 1b",rc);
s2 := stg_from_bswostg(rc := bswo_from_stg(s1 := breakex(5 * orig2 + orig))); print("long reconstruction: ",s1 = s2 and check_leaves(rc));
--print(hexify(s1)); 
check_mem("memcheck 2",rc);
s2 := stg_from_bswostg(rc := bswo_from_stg(s1 := breakex(15 * orig2 + orig))); print("longer reconstruction: ",s1 = s2 and check_leaves(rc));
check_mem("memcheck 3",rc);
s2 := stg_from_bswostg(rc := bswo_from_stg(s1 := breakex(15 * orig2))); print("longer reconstruction 2: ",s1 = s2 and check_leaves(rc));
check_mem("memcheck 4",rc);
s2 := stg_from_bswostg(rc := bswo_from_stg(s1 := breakex(20 * orig2 + orig))); print("longer reconstruction 3: ",s1 = s2 and check_leaves(rc));
check_mem("memcheck 5",rc);

no_error := true;
for j in [1..10] loop
	s2 := bswo_comp(rc := bswo_from_stg(s1 := breakex(orig)),j); no_error and:= (comp_piece(s1,j) = s2);
	if j < 10 then incref(rc,-1); end if;
end loop;
print("comps of short reconstruction: ",no_error);
check_mem("memcheck 6",rc);

no_error := true;
for j in [1..10] loop
	s2 := bswo_comp(rc := bswo_from_stg(s1 := breakex(orig + orig3)),j); no_error and:= (comp_piece(s1,j) = s2);
	if j < 10 then incref(rc,-1); end if;
end loop;
print("comps of short reconstruction: ",no_error);
check_mem("memcheck 7",rc);

no_error := true;
for j in [1..50] loop
	s2 := bswo_comp(rc := bswo_from_stg(s1 := breakex(5 * orig2 + orig3)),j); no_error and:= (comp_piece(s1,j) = s2);
	if j < 50 then incref(rc,-1); end if;
end loop;
print("comps of short reconstruction: ",no_error);
check_mem("memcheck 8",rc);

no_error := true;
for j in [1..50] loop
	s2 := bswo_comp(rc := bswo_from_stg(s1 := breakex(5 * orig)),j); no_error and:= (comp_piece(s1,j) = s2);
	if j < 50 then incref(rc,-1); end if;
end loop;
print("comps of short reconstruction: ",no_error);
check_mem("memcheck 9",rc);

no_error := true;
for j in [1..50] loop
	s2 := bswo_comp(rc := bswo_from_stg(s1 := breakex(2 * orig + orig3)),j); no_error and:= (comp_piece(s1,j) = s2);
	if j < 50 then incref(rc,-1); end if;
end loop;
print("comps of short reconstruction: ",no_error);
check_mem("memcheck 10",rc);

rc := bswo_from_stg(s1 := breakex(4 * orig + orig3)); 
bswo_set_comp(rc,3,4 * "\x66");	set_comp_piece(s1,3,4 * "\x66");	-- test component assignment operation, long form
s2 := stg_from_bswostg(rc); print("long slice shortening insert: ",s1 = s2 and check_leaves(rc));
check_mem("memcheck 11",rc);

rc := bswo_from_stg(s1 := breakex(4 * orig + orig3)); 
bswo_set_comp(rc,3,OM);	set_comp_piece(s1,3,"");	-- test component deletion operation, long form
s2 := stg_from_bswostg(rc); print("long slice shortening insert: ",s1 = s2 and check_leaves(rc));
check_mem("memcheck 21",rc);

rc := bswo_from_stg(s1 := breakex(orig + orig3)); 
bswo_set_comp(rc,3,4 * "\x66");	s1(3..2) := "XX";	-- test slice insertion operation, long form
s2 := stg_from_bswostg(rc); print("long slice insert: ",s1 = s2 and check_leaves(rc));
check_mem("memcheck 22",rc);

rc := bswo_from_stg(s1 := orig + orig3); 
bswo_set_comp(rc,3,4 * "\x66");s1(3..2) := 3 * orig2;		-- test slice insertion operation, long form
s2 := stg_from_bswostg(rc); print("long slice long insert: ",s1 = s2 and check_leaves(rc));
check_mem("memcheck 23",rc);

rc := bswo_from_stg(s1 := orig);
bswo_set_comp(rc,1,4 * "\x66");	s1(1..3) := "XX";	-- test slice assignment operation, short form
s2 := stg_from_bswostg(rc); print("short slice shrink-a-bit: ",s1 = s2 and check_leaves(rc));
check_mem("memcheck 24",rc);

rc := bswo_from_stg(s1 := orig);
bswo_set_comp(rc,3,OM); s1(1..3) := "";		-- test slice assignment operation, short form
s2 := stg_from_bswostg(rc); print("short slice shrinkage: ",s1 = s2 and check_leaves(rc));
check_mem("memcheck 25",rc);

rc := bswo_from_stg(s1 := orig);
bswo_set_comp(rc,3,"YYYYY");	s1(1..3) := "YYYYY";	-- test slice assignment operation, short form
s2 := stg_from_bswostg(rc); print("short slice expand: ",s1 = s2 and check_leaves(rc));
check_mem("memcheck 26",rc);

rc := bswo_from_stg(s1 := orig);
bswo_insert(rc,1,"ZZZZZ");	s1(1..0) := "ZZZZZ";	-- test slice assignment operation, short form
s2 := stg_from_bswostg(rc); print("short slice insert: ",s1 = s2 and check_leaves(rc));
check_mem("memcheck 27",rc);

rc := bswo_from_stg(s1 := orig + orig3); rl := bs_length(rc);
bswo_insert(rc,OM,"XX"); s1(rl + 1..rl) := "XX";		-- test slice append operation, short form
s2 := stg_from_bswostg(rc); print("long slice append: ",s1 = s2 and check_leaves(rc));
check_mem("memcheck 28",rc);

rc := bswo_from_stg(s1 := 2 * orig + orig3); rl := bs_length(rc);
bswo_insert(rc,OM,"WWWWWWW"); s1(rl + 1..rl) := "WWWWWWW";		-- test slice append operation, long form
s2 := stg_from_bswostg(rc); print("long slice append 2: ",s1 = s2 and check_leaves(rc));
check_mem("memcheck 29",rc);

rc := bswo_from_stg(s1 := 5 * orig2 + orig3); rl := bs_length(rc);
bswo_insert(rc,OM,"YYYYY"); s1(rl + 1..rl) := "YYYYY";		-- test slice append operation, longer form
s2 := stg_from_bswostg(rc); print("long slice append 3: ",s1 = s2 and check_leaves(rc));
check_mem("memcheck 30",rc);

rc := bswo_from_stg(s1 := 5 * orig2 + orig3); rl := bs_length(rc);
bswo_insert(rc,OM,3 * orig); s1(rl + 1..rl) := 3 * orig;		-- test slice append operation, longer form
s2 := stg_from_bswostg(rc); print("long slice long append: ",s1 = s2 and check_leaves(rc));
check_mem("memcheck 30A",rc);

end package_tests;

procedure check_leaves(rec);		-- checks that leaves have required maximum and minimum lengths

	if (contents := dr_load(rec))(type_byte) = wdoccs_string_record then	-- node is a leaf

		if (srl := wo_length(rec)) > wos_hi_lim then 
			print("EXCESSIVE STRING LENGTH DETECTED IN NODE ",hexify(rec)," ",hexify(contents));
			return false;
		end if;
		
		return true;		-- node is ok
	end if;		-- otherwise we proceed recursively
	
	return forall j in [1..num_childr(rec)] | check_leaves_in(wo_voc(rec,j));

procedure check_leaves_in(rec);		-- inner workhorse

	if (contents := dr_load(rec))(type_byte) = wdoccs_string_record then	-- node is a leaf

		if (srl := wo_length(rec)) > wos_hi_lim then 
			print("EXCESSIVE STRING LENGTH DETECTED IN LEAF ",hexify(rec)," ",srl," ",hexify(contents));
			return false;
		end if;

		if srl < wos_low_lim then 
			print("INSUFFICIENT STRING LENGTH DETECTED IN LEAF ",hexify(rec)," ",srl," ",hexify(contents));
			return false;
		end if;

		return true;		-- leaf is ok

	end if;		-- otherwise we proceed recursively

	return forall j in [1..num_childr(rec)] | check_leaves_in(wo_voc(rec,j));

end check_leaves_in;

end check_leaves;

procedure big_string_class_test;		-- test program for big_string class

the_stg := big_string(s1 := orig);
print("string length and selfstr: ",#the_stg = #s1," ",str(the_stg) = s1);
print("short slice class: ",the_stg(5..95) = s1(5..95) and check_leaves(the_stg___rec));
check_mem("memcheck 31",the_stg___rec);

the_stg := big_string(s1 := orig + orig3);
print("long slice class: ",the_stg(5..130) = s1(5..130) and check_leaves(the_stg___rec));
check_mem("memcheck 32",the_stg___rec);

the_stg := big_string(s1 := 5 * orig2 + orig3);
print("longer slice class: ",the_stg(5..630) = s1(5..630) and check_leaves(the_stg___rec));
check_mem("memcheck 33",the_stg___rec);

the_stg := big_string(s1 := 5 * orig);
print("longer slice class 2: ",the_stg(5..620) = s1(5..620) and check_leaves(the_stg___rec));
check_mem("memcheck 34",the_stg___rec);

the_stg := big_string(s1 := 2 * orig + orig3);
print("longer slice class 3: ",the_stg(5..255) = s1(5..255) and check_leaves(the_stg___rec));
check_mem("memcheck 35",the_stg___rec);

the_stg := big_string("");
the_stg(1..0) := 5 * "abcde"; print("insert from null: ",str(the_stg) = 5 * "abcde"); 
the_stg(26..25) := 5 * orig; print("tail insert from null: ",
				str(the_stg) = (5 * "abcde" + 5 * orig) and check_leaves(the_stg___rec)); --
check_mem("memcheck 36",the_stg___rec);

the_stg := big_string(ori := ori_copy := 5 * "abcde" + 5 * orig);
--print("the_stg after creation: ",str(the_stg)," --- the_stg: ",the_stg); -- ?????????? SETL PRINT BUG ??????????
--print("the_stg after creation: ",str(the_stg)); -- THIS WORKS OK; PREVIOUS LINE DOES NOT
print("string length, selfstr, length check,selfstr check: ",#the_stg = #ori," ",str(the_stg) = ori);
stg_copy := the_stg; incref(the_stg___rec,1);		-- note that an extra copy has been created

print("slice extraction and original: ",the_stg(1..10) = ori(1..10) and check_leaves(the_stg___rec)); 

print("second slice extraction and original: ",the_stg(10..20) = ori(10..20));
print("slice extraction check: ",the_stg(1..10) = ori(1..10)); 
print("second slice extraction check: ",the_stg(10..20) = ori(10..20));
print("tail slice check: ",the_stg(10..#ori) = ori(10..#ori));

the_stg(1..0) := (extra := "XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX."); ori2 := ori;
ori(1..0) :=  extra;
print("concatenation of 50 characters at start: ",#the_stg," ",str(the_stg) = ori and check_leaves(the_stg___rec)); 

print("the_stg: ",str(the_stg)); print(); print(str(stg_copy));
incref(the_stg___rec,-1);		-- delete the string

	-- we must try long and short pure insertions, long and short impure insertions 
	-- into a single section, and long and short insertions which erase longer 
	-- runs
the_stg := stg_copy; incref(stg_copy___rec,1); ori := ori_copy; ori(1..0) := "XXX";
the_stg(1..0) := "XXX"; print("insertion of 3 characters at start: ",#the_stg," ",str(the_stg) = ori);
		-- pure short insertion case
incref(the_stg___rec,-1);		-- delete the string

the_stg := stg_copy;  incref(stg_copy___rec,1); ori := ori_copy; ori(21..20) := "XXX";
the_stg(21..20) := "XXX"; print("insertion of 3 characters after position 20: ",#the_stg," ",str(the_stg) = ori);		-- pure short insertion case
incref(the_stg___rec,-1);		-- delete the string

the_stg := stg_copy;  incref(stg_copy___rec,1); ori := ori_copy; ori(10..12) := "XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.";
the_stg(10..12) := "XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX."; 
print("50 Xs replace chars 10 thru 12: ",#the_stg," ",str(the_stg) = ori);
incref(the_stg___rec,-1);		-- delete the string

the_stg := stg_copy;  incref(stg_copy___rec,1); ori := ori_copy; 
the_stg(10..400) := "XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.";  ori(10..400) := "XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.";
print("50 Xs replace chars 10 thru 400: ",#the_stg," ",str(the_stg) = ori);		--the_stg.print_raw();
incref(the_stg___rec,-1);		-- delete the string

the_stg := stg_copy;  incref(stg_copy___rec,1); ori := ori_copy; ori(10..12) := "XXX";
the_stg(10..12) := "XXX"; print("3 Xs replace chars 10 thru 12: ",#the_stg," ",str(the_stg) = ori);
incref(the_stg___rec,-1);		-- delete the string

the_stg := stg_copy;  incref(stg_copy___rec,1); ori := ori_copy; ori(150..625) := "XXX";
the_stg(150..625) := "XXX"; print("3 Xs replace chars 150 thru 625: ",#the_stg," ",str(the_stg) = ori);
incref(the_stg___rec,-1);		-- delete the string

the_stg := stg_copy;  incref(stg_copy___rec,1); ori := ori_copy; ori(10..400) := "XXX";
the_stg(10..400) := "XXX"; print("3 Xs replace chars 10 thru 40: ",#the_stg," ",str(the_stg) = ori);
incref(the_stg___rec,-1);		-- delete the string

the_stg := stg_copy;  incref(stg_copy___rec,1); ori := ori_copy; ori(10..12) := "";
the_stg(10..12) := ""; print("chars 10 thru 12 deleted: ",#the_stg," ",str(the_stg) = ori);		-- pure deletion case
incref(the_stg___rec,-1);		-- delete the string

the_stg := stg_copy;  incref(stg_copy___rec,1); ori := ori_copy; ori(10..400) := "";
the_stg(10..400) := ""; print("chars 10 thru 40 deleted: ",#the_stg," ",str(the_stg) = ori);		-- pure long deletion case
incref(the_stg___rec,-1);			-- delete the string

print("first character deletion test"); ori := ori_copy; 
the_stg1 := stg_copy; incref(stg_copy___rec,1); the_stg1(1..1) := ""; ori(1..1) := ""; print(str(the_stg1) = ori);
the_stg2 := the_stg1; incref(the_stg1___rec,1); the_stg2(1..1) := ""; ori(1..1) := ""; print(str(the_stg2) = ori);
the_stg3 := the_stg2; incref(the_stg2___rec,1); the_stg3(1..1) := ""; ori(1..1) := ""; print(str(the_stg3) = ori);
incref(the_stg3___rec,-1); incref(the_stg2___rec,-1); incref(the_stg1___rec,-1); 
 		-- delete the string copy

check_mem("memcheck 37",stg_copy___rec);		-- final erasure

end big_string_class_test;

end test;

« November 2024 »
Su Mo Tu We Th Fr Sa
1 2
3 4 5 6 7 8 9
10 11 12 13 14 15 16
17 18 19 20 21 22 23
24 25 26 27 28 29 30
 

Powered by Plone CMS, the Open Source Content Management System

This site conforms to the following standards: