\ hs 07.11.95
\ Generates an array containing a sorted list of the addresses of all the
\ words in the dictionary.

\ sort-dictionary ( -- )	generates the list
\ word-array      ( -- adr )	address of the array of word addresses
\ #words          ( -- n )	the number of words in the list
\ word-index      ( adr -- i )	finds the index within the word address
\				array of the largest address which is less
\				than adr

needs heap-sort lib/sort.fth

only forth also hidden also  forth definitions

0 constant word-array
hidden definitions
0 constant next-location

forth definitions
: #words	( -- n )
	next-location word-array -  /cell /  1-  ;

hidden definitions
: new-node	( acf -- )
	next-location !
	next-location  cell+  is next-location ;
: count-words	( -- #words )
	\ Count the total number of words in the dictionary.
	0 voc-link link@
	begin
		dup voc> >threads  follow
		begin	another?
		while	drop  ( cnt voc-link )  swap 1+ swap
		repeat
		link@ dup origin =
	until   ( #words link )  drop ;
: allocate-array  ( -- )
	\ One extra slot for heap sort temporary entry, one slot for origin
	count-words  2+ cells	( #bytes )
	alloc-mem  ( adr )
	dup is word-array  is next-location
	0  new-node
	origin new-node ;

\ These 2 routines account for half of the total time for the sort,
\ and they are very simple, so we should implement them in code.
: dsort-test	( i j -- flag )
	word-array swap cells+ @  word-array rot cells+ @  < ;
: dsort-copy	( i j -- )
	word-array rot cells+ @  word-array rot cells+ ! ;
: adr@		( index -- )
	word-array swap 1+ cells+ @  ;

forth definitions
\ Generates an array containing the addresses of all the words in the
\ dictionary, sorted in ascending order of the word's address.
: sort-dictionary	( -- )
	[""] _!@#end_ "create	\ Dummy word to mark the top of the dictionary
	allocate-array
		['] dsort-test  is rec-test
		['] dsort-copy  is rec-copy
	voc-link link@
	begin
		dup voc> >threads  follow
		begin  another?  while  name> new-node  repeat
		link@ dup origin =
	until   drop
	#words  heap-sort ;


\ Finds the index within the word table of the last word whose address
\ is <= the indicated adr.
: word-index	( adr -- n )
	\ Binary search
	#words 0			( adr high low )
	begin	2dup 1+ >
	while				( adr high' low' )
		2dup - 2/ over +	( adr high low test )
		dup adr@		( adr high low test test-adr )
		4 pick >
		if			( adr high low test )
			rot drop swap	( adr test low )
		else			( adr high low test )
			nip		( adr high test )
		then			( adr high' low' )
	repeat				( adr high' low' )
	nip nip ;			( low )
