\ Forthmacs simple database
\ hs 19.11.97
only forth also  system also  hidden also definitions  decimal
needs heap-sort		lib.sort
variable tempidx
0 constant idxfd
0 constant datafd
0 constant idxoff
0 constant idxrecsize
0 constant idxbuff1
0 constant idxbuff2
0 constant idxitemsize
0 constant idxbuffer
0 constant idxdatabuffer
defer comp-op

: >db-items	( descr -- *it )	; immediate
: >db-recsize	( descr -- *#rec )	cell+ ;
: >db-fd	( descr -- *fd )	2 cells+ ;
: >db-recs	( descr -- *records)	3 cells+ ;
: >db-definer	( descr -- *definerpfa)	4 cells+ ;
: >db-indexes	( descr -- arrayaddr )	5 cells+ ;
: >db-idxfds	( item descr -- addr )	swap cells+ 256 + ;
: ?db-open	( descr -- )		dup @ =  if -400 throw then ;
: ?db-closed	( descr -- )		dup @ <> if -401 throw then ;
: item>pos	( n descr -- offset )	swap 44 * + 512 + ;
: 0datapars	( n -- )		0 ?do 2drop loop ;
: >db-recoff	( rec descr -- off )	>r  dup 0 r@  >db-recs @ within if r> >db-recsize @ * else -402 throw then ;
: get-item-info	( item descr -- name off siz type ) item>pos dup >r 2 cells+  r@ 2@  r> 40 + @ ;
: ?allocate	( size -- addr )	allocate if -279 throw then ;
: ?free		( addr -- )		free if -327 throw then ;
: make&type	( name type -- )	locals| type fname | fname make 0= if -273 throw then  type fname f_type drop ;
: ?fopen	( name -- fd )		modify fopen ?dup 0= if -38 throw then ;
: ?(md		( path -- )		(md if -601 throw then ;
: idx-valid	( item descriptor )	>db-indexes bitset ;
: idx-invalid	( item descriptor )	>db-indexes bitclear ;
: clear-all-idx	( descriptor )		>db-indexes 8 erase ;

forth definitions
\ operater tools for work in database
: items#	( descr -- items )	>db-items @ ;
: recsize#	( descr -- recordsize )	>db-recsize @ ;
: records#	( descr -- records )	>db-recs @ ;
: ?find-item	( str descriptor -- item# true | false )
	dup items# locals| items descriptor str |  descriptor ?db-open false
	items 0 ?do i descriptor item>pos 2 cells+ str "= if drop i true leave then loop ;
: get-record		\ ( record addr descriptor -- addr )
	dup >db-fd @ locals| fd descriptor addr record |
	record descriptor >db-recoff fd fseek  addr  descriptor recsize# fd fgets drop  addr ;
: store-record		\ ( record addr descriptor -- )
	dup >db-fd @ locals| fd descriptor addr record |
	record descriptor >db-recoff fd fseek  addr descriptor recsize# fd fputs
	descriptor clear-all-idx ;
: swap-records		\ ( record1 record2 descriptor  -- )
	dup >db-fd @ 0 0 0 0 locals| pos1 pos2 recsize buff fd descriptor record2 record1 |
	descriptor recsize# dup is recsize  2* ?allocate is buff
	record1 descriptor >db-recoff dup is pos1 fd fseek	buff		recsize fd fgets drop
	record2 descriptor >db-recoff dup is pos2 fd fseek	buff recsize +  recsize fd fgets drop
	pos2 fd fseek	buff		recsize fd fputs
	pos1 fd fseek	buff recsize +	recsize fd fputs
	descriptor clear-all-idx ;
: new-records		\ ( cnt descriptor -- )
	dup >db-fd @ 0 locals| buff fd descriptor cnt |
	descriptor ?db-open descriptor recsize# ?allocate is buff
	buff descriptor recsize# erase  fd fsize fd fseek
	cnt 0 ?do buff descriptor recsize# fd fputs 1 descriptor >db-recs +! loop
	buff ?free descriptor clear-all-idx ;
: get-item		\ ( record item addr descriptor -- addr )
	dup >db-fd @ 0 0 locals| isize ioff fd descriptor addr item record |
	item descriptor item>pos 2@ is isize  is ioff
	record descriptor >db-recoff ioff + fd fseek addr isize fd fgets drop  addr ;
: store-item		\ ( record item addr descriptor -- )		\
	dup >db-fd @ 0 0 locals| isize ioff fd descriptor addr item record |
	item descriptor item>pos 2@ is isize  is ioff
	record descriptor >db-recoff ioff + fd fseek addr isize fd fputs
	item descriptor idx-invalid ;

hidden definitions
string-array item-types  ," "   ," /int"   ," /dint"   ," /string"  end-string-array
: get-item-type/size
	blword astring "move locals| str | 0
	str 1 item-types "=	if h# 01000004 + exit			then
	str 2 item-types "=	if h# 02000008 + exit			then
	str 3 item-types "=	if h# 03000001 +  blword str "copy	then
	str count evaluate + ;
: fcomp-lit	idxbuff1 @ idxbuff2 @ > ;
: fcomp-dlit	idxbuff1 2@ idxbuff2 2@ d> ;
: fcomp-str	idxbuff1 count idxbuff2 count caps on compare caps off 0> ;
: idx>file	\ ( addr item descriptor -- addr ) sets the filename in addr according to item
	astring locals| str descriptor item addr | push-decimal
	descriptor >db-definer @ cell+ addr "copy   item (.) str pack  addr "cat
	pop-base addr ;
: make-idx	\ ( item descriptor -- )
	astring -rot idx>file h# ffd make&type ;
: write-dummyidx \ ( item descriptor -- )
	astring locals| buff descriptor item |
	buff 4 erase  buff 4 idxfd fputs
	descriptor records# 0 ?do i buff !  buff 4 idxfd fputs loop ;
: fidx-copy	( i j -- )
	swap cells idxfd fseek	tempidx /cell idxfd fgets drop
	cells idxfd fseek	tempidx /cell idxfd fputs ;
: fidx-test	( i j -- flag )
	locals| j i |
	i cells idxfd fseek  tempidx /cell idxfd fgets drop tempidx @ idxrecsize * idxoff + datafd fseek
	idxbuff1 idxitemsize datafd fgets drop
	j cells idxfd fseek  tempidx /cell idxfd fgets drop tempidx @ idxrecsize * idxoff + datafd fseek
	idxbuff2 idxitemsize datafd fgets drop
	comp-op ;

: bcomp-lit	>r @  r> @ > ;
: bcomp-dlit	>r 2@ r> 2@ d> ;
: bcomp-str	>r count r> count caps on compare caps off 0> ;
: bidx-copy	( i j -- )
	>r cells idxbuffer + @  idxbuffer r> cells+ ! ; 
: bidx-test	( i j -- flag )
	>r cells idxbuffer + @	idxrecsize * idxdatabuffer +
	idxbuffer r> cells+ @	idxrecsize * idxdatabuffer +	
	comp-op ;

: sort-index	( item descriptor -- )
	astring 0 0 0 locals| ramflag? idxrecs idxtype buff descriptor item | descriptor ?db-open
	descriptor >db-fd @ is datafd
	item descriptor get-item-info is idxtype  is idxitemsize  is idxoff  drop
	descriptor records# is idxrecs
	descriptor recsize# is idxrecsize
	idxitemsize ?allocate is idxbuff1  idxitemsize ?allocate is idxbuff2
	item descriptor make-idx
	buff item descriptor idx>file ?fopen is idxfd
	available  idxrecs cells  idxrecs idxrecsize * + 5000 cells + > dup  is ramflag?
	if	idxrecs 1+ cells ?allocate is idxbuffer
		idxbuffer idxrecs 1+ cells erase
		idxrecs 0 ?do i  idxbuffer i 1+ cells+ ! loop
		idxrecs idxrecsize * ?allocate is idxdatabuffer
		idxoff datafd fseek  idxdatabuffer  idxrecs idxrecsize * idxoff -   datafd fgets drop
		['] bidx-copy is rec-copy
		['] bidx-test is rec-test
		idxtype
		case	1 of ['] bcomp-lit	endof
			2 of ['] bcomp-dlit	endof
			3 of ['] bcomp-str	endof
		endcase is comp-op 
	else	item descriptor write-dummyidx
		['] fidx-copy is rec-copy
		['] fidx-test is rec-test
		idxtype
		case	1 of ['] fcomp-lit	endof
			2 of ['] fcomp-dlit	endof
			3 of ['] fcomp-str	endof
		endcase is comp-op 
	then
	descriptor records# heap-sort
	item descriptor idx-valid
	idxbuff2 ?free  idxbuff1 ?free
	ramflag?
	if	0 idxfd fseek  idxbuffer idxrecs 1+ cells idxfd fputs	
		idxbuffer ?free  idxdatabuffer ?free
	then	idxfd fclose ;
: use-index	( item descriptor -- )
	astring locals| buff descriptor item |
	item descriptor item>pos 40 + @ 0= abort" not an indeable item type"
	item descriptor >db-indexes bittest 0=
	if	item descriptor >db-idxfds @ ?dup if fclose then
		item descriptor	sort-index		
		buff item descriptor idx>file ?fopen  item descriptor >db-idxfds !
	else	item descriptor >db-idxfds @ 0=
		if	buff item descriptor idx>file ?fopen  item descriptor >db-idxfds !
		then
	then ;
forth definitions
: index->record		\ ( idx item descriptor -- record )
	0 locals| fd descriptor item idx |
	descriptor ?db-open  item descriptor use-index	
	item descriptor >db-idxfds @ is fd
	idx 1+ cells fd fseek  tempidx /cell fd fgets drop tempidx @ ;
hidden definitions
: get-idx-item		\ ( idx item addr descriptor -- addr )
	locals| descriptor addr item idx |
	idx item descriptor index->record  item addr descriptor get-item ;
forth definitions
: search-item	( parameter item descriptor -- index record )
	0 0 0 0 0 astring astring locals| par buff rmid rmax rmin itype fd descriptor item |
	descriptor records# 1- is rmax  0 is rmin
	item descriptor item>pos 40 + @ is itype
	itype 1 =	if par !	['] bcomp-lit is comp-op	then
	itype 2 =	if par 2!	['] bcomp-dlit is comp-op	then
	itype 3 =	if par "copy	['] bcomp-str is comp-op	then
	begin	rmax rmin - 1 >
	while	rmin  rmax rmin	- 2/ + is rmid
		par  rmid item buff descriptor get-idx-item  comp-op
		if rmid is rmin else rmid is rmax then
	repeat	rmin  
	par over item buff descriptor get-idx-item comp-op if 1+ then itype 3 = if 1+ then
	item descriptor >db-idxfds @ is fd
	dup 1+ cells fd fseek buff /cell fd fgets drop buff @ ;

hidden definitions
3 action-name searchin
4 action-name indexed
5 actions" obj. Database item"
	action:		\ object ( record addr -- addr )	reads the records item
		dup cell+ @ @ >r @ swap r> get-item ;
	action:		\ to object	( record addr -- )		sets the record item
		dup cell+ @ @ >r @ swap r> store-item ;
	action:		\ addr object ( -- objadr )
		??cr ." Warning: addr of database item requested" ;
	action:		\ searchin object ( parameter -- ixd record )
		dup cell+ @ @ >r @ r> search-item ;
	action:		\ indexed object ( index -- record )
		dup @ swap cell+ @ @ index->record ;
: "item-object	\ ( itemoff database item name -- )
		"create , a, , use-actions ;

: (database	\ ( strlast size/typelast ... str1 size/type1 str0 size0 items databasename -- )
	astring astring astring astring 0 0 0 0 locals| db #ent #rec fd buff dname path fname name items |
	p" <Forthmacs$Dir>.data." path "copy name path "cat  path ?(md
	p" ." path "cat  buff 256 erase
	path fname "copy  p" data" fname "cat  fname file-exists? 0=		if fname h# ffd make&type then
	path fname "copy  p" struct" fname "cat  fname file-exists? 0=	dup >r  if fname h# ffd make&type then
	fname ?fopen is fd r> if buff 256 fd fputs buff 256 fd fputs then  512 fd fseek
	p" Data:" dname "copy  name dname "cat
	dname "header pointer-cf  here dup is db a, path count ", align
	items 0
	do	buff 44 erase		( string size )
		name dname "copy  p" :" dname "cat  over dname "cat
		#rec db #ent dname "item-object	( string size/type )
		dup 24 rshift buff 40 + ! h# 00ffffff and
		dup buff !  #rec buff cell+ !  #rec + is #rec
		buff 2 cells+ "copy  buff 44 fd fputs  #ent 1+ is #ent
	loop
	#rec aligned #ent buff  2! 0 fd fseek  buff 8 fd fputs  fd fclose ;

forth definitions

action-compiler: searchin
action-compiler: indexed
: open-database		\ ( definition -- )
	astring astring 0 0 0 locals| fd hsize descriptor fname buff definition |
	definition ?db-closed
	definition cell+ fname "copy  p" struct" fname "cat
	fname ?fopen is fd  fd fsize is hsize  hsize ?allocate is descriptor
	descriptor hsize fd fgets drop fd fclose
	descriptor 256 + 256 erase
	definition descriptor >db-definer !  descriptor definition a!
	definition cell+ fname "copy  p" data" fname "cat
	fname ?fopen descriptor >db-fd ! ;
: close-database		 	\ ( descriptor -- )
	astring 0 locals| fd fname descriptor |
	descriptor ?db-open
	descriptor >db-definer @ dup a!
	descriptor >db-fd @ fclose
	descriptor 256 + 256 bounds do i @ ?dup if fclose then /cell +loop
	descriptor >db-definer @  cell+ fname "copy  p" struct" fname "cat
	fname ?fopen is fd
	descriptor  fd fsize fd fputs  fd fclose
	descriptor ?free ;

: <database	\ name ( -- )
	blword astring "move astring 0 0 locals| buff items linebuf dname |
	ps-size /cell /  depth -  20 cells -	dup
	40 * ?allocate is buff	( maxitems )
	0 do	blword buff i 40 * + cell+ "move p" database>" "= ?leave
		get-item-type/size buff i 40 * + !
		items 1+ is items
	loop
	items 0 ?do buff items i - 1- 40 * + dup cell+ swap @ loop
	items dname (database buff ?free ;

: .database		\ ( descriptor -- )
	locals| descriptor |
	descriptor ?db-open
	??cr	." Database Path:       "  descriptor >db-definer @ cell+ ".
	cr	." Database Descriptor: "  descriptor 1 .r
	cr 	descriptor records# 		9 .r ."  records"
	cr 	descriptor recsize# 		9 .r ."  recordsize"
	cr	descriptor items#		9 .r ."  items per record"
	cr	descriptor >db-definer @	9 .r ."  database definition addr"
	cr	10 spaces ." Indexed: " 	64 0 do i descriptor 5 cells+ bittest abs 1 .r loop
	cr	10 spaces ." Itemlist:"
	descriptor items# 0
	do ??cr 10 spaces i descriptor get-item-info item-types ". 18 to-column  4 .r space drop ". loop ;

only forth also definitions
fexit


All database accesses use a common database descriptor, defined by this structure:
0	number of items per record
4	size of one record
8	0 -- later is fd
12	number of records
16	pointer to parameter field of database definition
20-27	holds bitmap for indexed items
28-255	reserved
256-511	array of file fds for indexing

512-555	first item description
each:	0	size of item in address-units
	4	relative position in record in address-units
	8 - 39	string representing name of item
	40-43	type of item
556-	next item description
...


Databases are declared with <database command using this syntax
<database name
	/cell	Entry0
	size    Entryx
database>
Please remember: NO comments are allowed within this declaration

a database declaration defines a word
  Database:name ( -- addr )
addr points to itself when a database is closes, otherwise it points to the
database descriptor buffer.

After the database name is defined, pairs of words are read, the first giving the
the items name. The second may just be a number giving the items size in bytes, these items have
an undefined type and may not be indexed.
Typed - and therefore indexable - items can be defined with these descriptors
/int		cell wide signed integer
/dint		double wide signed integer
/string xx      ISO counted string with the maximum size of xx characters

end-string-array

Also the is an object defined for each item called name:Entryx
Two operations on this object are possible:
name:Entryx	( recordx -- addr )
	reads the items data from recordx into a buffer at addr
to name:Entryx	( addr recordx -- )
	writes the data found at addr into the items data field of recordx

a database item object has the following data structure
	0 item#
	4 database descriptor
	8 item-offset in record
	12 item size

comment on indexing:
	sort-index	( item descriptor -- )
generates a sorted index file on one item for a database. This works in
two possible modes
	1 - the amount of memory available allows the complete database plus
	indexfile to be loaded into memory. This is >100 times faster than
	2 - in this mode, all comparing and swapping is done in the file.
	This allowes BIG databases to be indexed but it is very slow.
The used algorithm is heap sort in both cases, bubble sort is faster with
mode two but slower than mode 1. As databases with 5000 records can be sorted
in < 1 second for mode 2, heap sort has been choosen for best interactivity.
