# -------------------- autocode.awk vα.21 ----------------------
	#	(C) 1996.06.05 - 1996.06.22 田中重人
	# -------------------------------------------------------------
	# 文章データ自動コーディング
	# -------------------------------------------------------------
	# オプション変数(=既定値)
	#	L=8		切り出し文字列の前後の長さ
	#	B=1		0/1で出力 (or 出現回数)
	#	V=0		コーディングルールを表示
	# -------------------------------------------------------------
	# 内部変数:
	#	commentmark	この記号から行末までをコメントとする
	#	id	ID を格納してあるフィールド番号
	#	field	検索対象フィールド番号の並び
	#	keys		検索対象フィールド番号の配列
	#	outfile		出力先ファイル名
	#	RE[]		検索対象文字列
	#	code[re]	re 番目の検索対象文字列に対するコード文字列
	#	NotLook0[re,i]	コード回避文字列 (前)
	#	NotLook2[re,i]	コード回避文字列 (後)
	# -------------------------------------------------------------

BEGIN   {
	RS = "" ;
	FS = "\n" ;
	OFS= FS ;
	ORS= "\n" ;
	id = 1;
	keys[1] =0;
	commentmark = ";" ;
	L=8 ;
	B=1;
	defaultoutfile="/dev/stderr" ;

	print "Autocode.awk (alpha.21) by TANAKA Sigeto.\n"
}

( 1==NR ) { rulefile=FILENAME; } 

	# -------------------------------------------------------------
	#	回避文字列を検索する関数: int NotLook02( 文字列, 前回避文字列, 後回避文字列 )
	#		戻り値:	 0 (回避文字列にマッチせず)
	#			+1 (前回避文字列にマッチ)
	#			+2 (後回避文字列にマッチ)
	#		
	# -------------------------------------------------------------


func NotLook02( i, prefix, postfix, 	j ) {
	for( j=1; (i,j)in NotLook0; ++j )	if( prefix ~NotLook0[i,j] ) return 1;
	for( j=1; (i,j)in NotLook2; ++j )	if( postfix~NotLook2[i,j] ) return 2;
	return 0;
}
	
	
( FILENAME!=rulefile && 1==FNR ) {	# コーディング (ふたつめ以降の処理対象ファイル) 初期設定
	OFS= "\t" ;
	ORS= "\n" ;
	if( 0!=V ) {
		print "Table of expressions and linkages:"
		for( i=1; i<=_re; ++i ){
			printf( "\n%d:%s\tcode=%s\n" , i, RE[i], code[i] ) ;
			for( j=1; (i,j)in NotLook0; ++j )	printf( "\t<%s" , NotLook0[i,j] ) ;
			for( j=1; (i,j)in NotLook2; ++j )	printf( "\t>%s" , NotLook2[i,j] ) ;
		}
		print "\n" ;
		--V;
	}
	printf( "%d expressions were read from file %s .\n" , _re, rulefile ) ;

	printf( "\nInput Files = " ) ;
	for( i=2; i<=ARGC; ++i )	printf ARGV[i] " ";

	if( ""==outfile )		outfile = defaultoutfile ;
	outfile  = trim( outfile )
	print "\nOutput File = " outfile ;
	printf( "" ) > outfile ;

	print "ID  Field = " id;

	split( field , keys, "[^0-9]+" ) ;
	printf( "Key Fields= " ) ;
	for( i=1; i in keys; ++i )	printf keys[i] " " ;
	
	print "" ;

	printf( "\nOutput design for file %s (field:code)\n" , outfile ) ;
	printf( "ID\t" );
	for( k=1; k in keys; ++k ){
		for( i=1; i in RE; ++i )	if( _code != code[i] ) {
			_code=code[i] ;
			printf( "%d:%s\t" , keys[k], _code );
			++x;
		}
	}
	print "\n" x+1 " columns are put into file " outfile " .\n" ;

	print "Matched cases in file " FILENAME " (ID, field, code, text):";

	for( s in NotLook0 )	NotLook0[s] = NotLook0[s] "$" ;
	for( s in NotLook2 )	NotLook2[s] = "^" NotLook2[s] ;
}

( FILENAME!=rulefile ) {	# コーディング (ふたつめ以降の処理対象ファイル)
	printf( "%s" , $id ) >> outfile ;
	++case;
	for( k=1; k in keys; ++k ){
		for( i=1; i in RE; ++i ) {
			Look[2] = $keys[k];
			Look[0] = "" ;
			found[i]=0;
			while( match( Look[2], RE[i] ) ){
				Look[0] = Look[0] substr( Look[2], 1, RSTART-1 ) ;
				Look[1] = substr( Look[2], RSTART, RLENGTH ) ;
				Look[2] = substr( Look[2], RSTART+RLENGTH ) ;

				if( 0==NotLook02( i, Look[0], Look[2] ) ) {
					++found[i] ;
					prefix = jsubstr( Look[0], jlength(Look[0])+1-L ) ;
					postfix= jsubstr( Look[2], 1, L ) ;
					print $id, keys[k], code[i], prefix " [" Look[1] "] " postfix ;
				}

				Look[0] = Look[0] Look[1]
			}
		}

		x=0;
		_code = code[1] ;
		for( i=1; i in RE; ++i ) {
			if( _code != code[i] ) {
				printf( " %d" , x ) >> outfile ;
				x = 0 ;
				_code = code[i] ;
			}

			if( 0==B )	x += found[i] ;
			else x = ( x || found[i] );
		}
		printf( " %d" , x ) >> outfile ;
	}
	print "" >> outfile ;
}

	# -------------------------------------------------------------
	#	ルールファイルの読込
	# -------------------------------------------------------------

func trim(s, 	b, e ){	# 文字列の前後の空白・タブの除去 (結果文字列を返す)
	b = ( match( s, /[^ \t]/ ) ? RSTART : 1 ) ;
	e = ( match( s, /[ \t]*$/ )? RSTART : length(s) );
	return substr( s, b, e-b );
}

func nocomment(s, 	b ){	# ";" から後を除去 (結果文字列を返す)
	b = index( s, commentmark ) ;
	return b ? substr( s, 1, b-1 ) : s ;
}

func add_ARG(s, 	a, i ) {	# 処理対象ファイル名を ARGV に追加
	split( s, a, " " ) ;
	for( i=1; i in a ; ++i )	ARGV[i-1+ARGC] = a[i];
	ARGC += ( i-1 ) ;
}

( FILENAME==rulefile ) {	# コードルール読込 (ひとつめの処理対象ファイル)
	for( i=1; i<=NF; ++i ) {

		$i = trim( nocomment($i) );
		if( 0==length($i) )	continue;		# 空白フィールド
		else if( $i~/^\#id/ ) {			# ID フィールドの定義
			sub( /^\#id[ \t]*/, "", $i );
			id = $i ;
		}
		else if( $i~/^\#infile/ ) {			# 処理対象ファイルの定義
			sub( /^\#infile[ \t]*/, "", $i );
			add_ARG( $i ) ;
		}
		else if( $i~/^#field/ ) {			# 処理対象フィールドの制限
			sub( /^#field[ \t]*/, "", $i );
			field = $i ;
		}
		else if( $i~/^#outfile/ ) {			# データ出力先の指示
			sub( /^#outfile[ \t]*/, "", $i );
			outfile = $i ;
		}
		else if( $i~/^\[/ )	_code=$i;		# 「コード」の定義
		else if( $i~/^</ )	NotLook0[_re,++_NotLook0] = substr( $i, 2 ) ;	# 前回避
		else if( $i~/^>/ )	NotLook2[_re,++_NotLook2] = substr( $i, 2 ) ;	# 後回避
		else if( $i!~/^#/ ) {		# 検索文字列の定義
			RE[++_re]=$i;
			code[_re] = _code;
			_NotLook0 = _NotLook2 = 0;
		}
	}
}

END{
	printf ( "\n%d cases were read.\n" , case );
}