#!/usr/bin/perl

use strict;

use FreePWING::FPWUtils::FPWParser;
use FileHandle;
use Compress::Raw::Zlib;

use vars qw($ldoce5dir);
use vars qw($maindir $exdir $thesdir $collodir $phrasedir $worddir $prondir);
use vars qw($wdfamdir $etymoldir $vformsdir);
use vars qw($sideex $sidethes $sidecollo $sidephras $sideword $wdfamily $wdorigin $verbform);
use vars qw($indent $italic $bold $emphasize $link $phrasalverb);
use vars qw($condsearch $condskipword);
use vars qw($image_type $sound_type);
use vars qw($image_output $sfx_output $exa_output $gb_output $us_output);
use vars qw($debug);

require './ldoce5-fpw.conf';
require './parse.pl';

my $fpwtext;
my $fpwheading;
my $fpwword2;
my $fpwkeyword;
my $ncurrent;
my $ncontents;

MAIN: {
	if ($condsearch) {
		initialize_fpwparser('text' => \$fpwtext,
							 'heading' => \$fpwheading,
							 'word2' => \$fpwword2,
							 'keyword' => \$fpwkeyword);
	} else {
		initialize_fpwparser('text' => \$fpwtext,
							 'heading' => \$fpwheading,
							 'word2' => \$fpwword2);
	}
	prescan_content($exdir,     50694) if $sideex == 2;
	prescan_content($thesdir,    6128) if $sidethes == 2;
	prescan_content($collodir,  34748) if $sidecollo == 2;
	prescan_content($phrasedir, 11974) if $sidephras == 2;
	prescan_content($worddir,     215) if $sideword == 2;
	prescan_content($wdfamdir,    970) if $wdfamily == 2;
	prescan_content($etymoldir, 14121) if $wdorigin == 2;
	prescan_content($vformsdir,  5416) if $verbform == 2;
	parse_content($maindir,  51604);
	parse_content($exdir,    50694) if $sideex == 1;
	parse_content($thesdir,   6128) if $sidethes == 1;
	parse_content($collodir, 34748) if $sidecollo == 1;
	parse_content($phrasedir,11974) if $sidephras == 1;
	parse_content($worddir,    215) if $sideword == 1;
	parse_content($wdfamdir,   970) if $wdfamily == 1;
	parse_content($etymoldir,14121) if $wdorigin > 0;
	parse_content($vformsdir, 5416) if $verbform == 1;
	if ($condsearch) {
		finalize_fpwparser('text' => \$fpwtext,
						   'heading' => \$fpwheading,
						   'word2' => \$fpwword2,
						   'keyword' => \$fpwkeyword);
	} else {
		finalize_fpwparser('text' => \$fpwtext,
						   'heading' => \$fpwheading,
						   'word2' => \$fpwword2);
	}
}

exit(0);

sub parse_content {
	my ($path, $total) = @_;
	
	my $srcdir = $ldoce5dir.$path;
	$srcdir =~ s/^(.+?)\/?$/$1\//;
	
	my $content_filename = $srcdir.'CONTENT.tda';
	my $content_index_filename = $srcdir.'CONTENT.tda.tdz';
	
	my $content_handle = new FileHandle;
	if (!$content_handle->open("$content_filename", 'r')) {
		die "$content_filename: $^E\n";
	}
	binmode $content_handle;
	my $content_index_handle = new FileHandle;
	if (!$content_index_handle->open("$content_index_filename", 'r')) {
		die "$content_index_filename: $^E\n";
	}
	binmode $content_index_handle;
	
	## ƥ(ʸ)ڤؤ
	#$fpwtext->new_context() || die $fpwtext->error_message() . "\n";
	
	$ncurrent  = 0;
	$ncontents = $total;
	for (;;) {
		my $size;
		my $tmp;
		my $zipped_contents;
		my $contents;
		my $content;
		
		if (read($content_index_handle, $tmp, 8) != 8) {
			last;
		}
		($size) = unpack("x4V", $tmp);
		if (read($content_handle, $zipped_contents, $size) != $size) {
			die "File reading error: $content_filename\n";
		}
		my ($inflater, $status) = new Compress::Raw::Zlib::Inflate();
		if ($status != Z_OK) {
			die "Failed to initialize inflater\n";
		}
		$inflater->inflate($zipped_contents, $contents);
		if ($status != Z_OK && $status != Z_STREAM_END) {
			die "Failed to inflate\n";
		}
		foreach $content (split(/\0+/, $contents)) {
			my ($entrytag, $headword, $headword2, $searchword, $text) = get_content($content);
			if ($debug) {
				print "-------------------------------------------------\n";
				print "tag: $entrytag\n";
				print "hwd: $headword\n";
				print "hw2: $headword2\n";
				print "swd: $searchword\n";
				#print "txt: $text\n";
			}
			
			## ȥ
			register_entry($entrytag, $headword, $headword2, $searchword);
			
			## ʸ񤭹
			register_text(\$fpwtext, $text);
			
			## ܴ֤β
			$fpwtext->add_newline();
			$fpwtext->add_newline();
			
			$ncurrent++;
			$| = 1;
			if ($headword eq "") {
				printf("%-78s\r", "[$ncurrent/$ncontents:$entrytag]");
			} else {
				printf("%-78s\r", "[$ncurrent/$ncontents:$headword]");
			}
			$| = 0;
		}
	}
	close $content_handle;
	close $content_index_handle;
	print "\n";
}

sub register_entry {
	my ($entrytag, $headword, $headword2, $searchword) = @_;
	my $heading_position;
	my $text_position;
	
	## ʸ򿷤ȥڤؤ񤭹
	if ($entrytag ne "") {
		$fpwtext->new_entry() || die $fpwtext->error_message() . "\n";
		$fpwtext->add_entry_tag($entrytag) || die $fpwtext->error_message() . "\n";
		$text_position = $fpwtext->entry_position();
	} else {
		$text_position = $fpwtext->{'position'};
	}
	
	## Ф򿷤ȥڤؤФ񤭹
	$fpwheading->new_entry() || die $fpwheading->error_message() . "\n";
	register_text(\$fpwheading, $headword);
	$heading_position = $fpwheading->entry_position();
	
	## ʸ񤭹
	$fpwtext->add_keyword_start() || die $fpwtext->error_message() . "\n";
	if ($headword2 ne "") {
		if (!register_text(\$fpwtext, $headword2)) {
			print "headword2:[$headword2]\n";
			die $fpwtext->error_message() . "\n";
		}
	}
	$fpwtext->add_keyword_end() || die $fpwtext->error_message() . "\n";
	
	## Ͽ
	if (($headword ne "") && ($searchword ne "")) {
		if (!$fpwword2->add_entry($searchword, $heading_position,
								  $text_position)) {
			print "searchword:[$searchword]\n";
			die $fpwword2->error_message() . "\n";
		}
	}
}

sub register_condsearch_entry {
	my ($headword, $keyword) = @_;
	
	## ζ
	$keyword =~ s/^\s+//;
	$keyword =~ s/^s+$//;

	## ʬ򤹤
	my @keys;
	my $token;
	foreach $token (sort(split(/ /, $keyword))) {
		$token =~ s/^($condskipword)$//;
		$token =~ s/^\s+//;
		$token =~ s/\s+$//;
		if ($token ne "") {
			push @keys, $token;
		}
	}
	
	## Ͽ
	if ($#keys >= 0) {
#		## ʸ򿷤ȥڤؤ񤭹
#		$fpwtext->new_entry() || die $fpwtext->error_message() . "\n";
#		my $text_position = $fpwtext->entry_position();
		my $text_position = $fpwtext->{'position'};
		
		## Ф򿷤ȥڤؤФ񤭹
		$fpwheading->new_entry() || die $fpwheading->error_message() . "\n";
		register_text(\$fpwheading, $headword);
		my $heading_position = $fpwheading->entry_position();

		foreach $token (@keys) {
			if (!$fpwkeyword->add_entry($token, $heading_position, $text_position)) {
				print "keyword($token): [$keyword]\n";
				die $fpwkeyword->error_message() . "\n";
			}
		}
	}
}

sub register_text {
	my($fpwlocal, $str) = @_;
	my @token;
	my $text;
	my $indent_level = 2;


	while($str =~ s/(<tab[2-6]\/>)+(<br\/>|<br2\/>)+/$2$1/g){}
	$str =~ s/(<tab2\/>)+/<tab2\/>/g;
	$str =~ s/(<tab3\/>)+/<tab3\/>/g;
	$str =~ s/(<tab4\/>)+/<tab4\/>/g;
	$str =~ s/(<tab5\/>)+/<tab5\/>/g;
	$str =~ s/(<tab6\/>)+/<tab6\/>/g;
	$str =~ s/(<br\/>)+/<br\/>/g;
	$str =~ s/(<br\/><br2\/>|<br2\/><br\/>)/<br2\/>/g;
	$str =~ s/(<br2\/>)+/<br2\/>/g;
	$str =~ s/<b><br\/>/<br\/><b>/g;
	$str =~ s/(<br\/>|<br2\/>)+$//g;
	$str =~ s/!!\{hc-u-20\}/!!/g;
	$str =~ s/\{hc-u-20\}\{hc-u-20\}/\{hc-u-20\}/g;

	while ($str =~ /(([^<>\{\}]+)|(<.+?>)|(\{[hf]c-.+?\}))(.*)/) {
		push(@token, $1);
		$str = $5;
	}
	print join(" | ", @token), "\n" if $debug;

	foreach $text (@token) {
		if ($text =~ /<sub>/) {
			if (!$$fpwlocal->add_subscript_start()) {
				die $$fpwlocal->error_message() . "\n";
			}
		}
		elsif ($text =~ /<\/sub>/) {
			if (!$$fpwlocal->add_subscript_end()) {
				die $$fpwlocal->error_message() . "\n";
			}
		}
		elsif ($text =~ /<sup>/) {
			if (!$$fpwlocal->add_superscript_start()) {
				die $$fpwlocal->error_message() . "\n";
			}
		}
		elsif ($text =~ /<\/sup>/) {
			if (!$$fpwlocal->add_superscript_end()) {
				die $$fpwlocal->error_message() . "\n";
			}
		}
		elsif ($text =~ /<b>/ && $bold) {
			if (!$$fpwlocal->add_font_start('bold')) {
				die $$fpwlocal->error_message() . "\n";
			}
		}
		elsif ($text =~ /<\/b>/ && $bold) {
			if (!$$fpwlocal->add_font_end()) {
				die $$fpwlocal->error_message() . "\n";
			}
		}
		elsif ($text =~ /<i>/ && $italic) {
			if (!$$fpwlocal->add_font_start('italic')) {
				die $$fpwlocal->error_message() . "\n";
			}
		}
		elsif ($text =~ /<\/i>/ && $italic) {
			if (!$$fpwlocal->add_font_end()) {
				die $$fpwlocal->error_message() . "\n";
			}
		}
		elsif ($text =~ /<em>/ && $emphasize) {
			if (!$$fpwlocal->add_emphasis_start()) {
				die $$fpwlocal->error_message() . "\n";
			}
		}
		elsif ($text =~ /<\/em>/ && $emphasize) {
			if (!$$fpwlocal->add_emphasis_end()) {
				die $$fpwlocal->error_message() . "\n";
			}
		}
		elsif ($text =~ /<nw>/) {
			if (!$$fpwlocal->add_nowrap_start()) {
				die $$fpwlocal->error_message() . "\n";
			}
		}
		elsif ($text =~ /<\/nw>/) {
			if (!$$fpwlocal->add_nowrap_end()) {
				die $$fpwlocal->error_message() . "\n";
			}
		}
		elsif ($text =~ /<br\/>/) {
			if (!$$fpwlocal->add_newline()) {
				print "[$text]\n";
				die $$fpwlocal->error_message() . "\n";
			}
		}
		elsif ($text =~ /<br2\/>/) {
			if (!$$fpwlocal->add_newline()||!$$fpwlocal->add_newline()) {
				print "[$text]\n";
				die $$fpwlocal->error_message() . "\n";
			}
		}
		elsif ($text =~ /<tab([2-6])\/>/ && $indent) {
			$indent_level = $1;
			if (!$$fpwlocal->add_indent_level($1)) {
				print "[$text]\n";
				die $$fpwlocal->error_message() . "\n";
			}
		}
		elsif ($text =~ /<a name=\"(.+)\"\/>/ && $link) {
			if (!$$fpwlocal->add_entry_tag($1)) {
				print "[$text]\n";
				die $$fpwlocal->error_message() . "\n";
			}
		}
		elsif ($text =~ /<a href>/ && $link) {
			if (!$$fpwlocal->add_reference_start()) {
				die $$fpwlocal->error_message() . "\n";
			}
		}
		elsif ($text =~ /<\/a href=\"(.+)\">/ && $link) {
			if (!$$fpwlocal->add_reference_end($1)) {
				die $$fpwlocal->error_message() . "\n";
			}
		}

		elsif ($text =~ /<entry tag=\"(.*)\" head=\"(.*)\"\/>/ && $phrasalverb)
		{
			register_entry("", $2, "", $1);
		}
		elsif ($text =~ /<entry cond=\"(.*)\" head=\"(.*)\"\/>/ && $condsearch)
		{
			## ︡Υȥ
			register_condsearch_entry($2, $1);
		}
		elsif ($text =~ /<bmp tag=\"(.*)\" caption=\"(.*)\"\/>/ && $image_type)
		{
			if ( -f $image_output."/PIC_".$1.".jpg") {
				if (!$$fpwlocal->add_inline_jpeg_graphic_start("PIC_".$1)) {
					print "$text [$1], [$2]\n";
					die $$fpwlocal->error_message() . "\n";
				}
				if (!$$fpwlocal->add_inline_jpeg_graphic_end()) {
					print "$text [$1], [$2]\n";
					die $$fpwlocal->error_message() . "\n";
				}
			}
			else {
				print "NOT FOUND: $text [$1], [$2]\n";
			}
		}
		elsif ($text =~ /<wav tag=\"(.*)\" caption=\"(.*)\"\/>/ && $sound_type)
		{
			my $path = ".";
			my $label = "";
			if    ( $2 eq "GB" )  { $path = $gb_output;  $label="[GB]"; }
			elsif ( $2 eq "US" )  { $path = $us_output;  $label="[US]"; }
			elsif ( $2 eq "EXA" ) { $path = $exa_output; $label="";   }
			elsif ( $2 eq "SFX" ) { $path = $sfx_output; $label="";   }
			if ( -f $path."/".$2."_".$1.".wav" ) {
				if (!$$fpwlocal->add_sound_start($2."_".$1)) {
					print "$text [$1], [$2]\n";
					die $$fpwlocal->error_message() . "\n";
				}
				if (!$$fpwlocal->add_text($label)) {
					print "$text [$1], [$2]\n";
					die $$fpwlocal->error_message() . "\n";
				}
			} else {
				print "NOT FOUND: $text [$1], [$2]\n";
				if (!$$fpwlocal->add_sound_start("SFX_SILENT")) {
					print "$text [$1], [$2]\n";
					die $$fpwlocal->error_message() . "\n";
				}
				if (!$$fpwlocal->add_text("")) {
					print "$text [$1], [$2]\n";
					die $$fpwlocal->error_message() . "\n";
				}
			}
			if (!$$fpwlocal->add_sound_end()) {
				print "$text [$1], [$2]\n";
				die $$fpwlocal->error_message() . "\n";
			}
		}
		elsif ($text =~ /\{fc-(.+?)\}/) {
			if (!$$fpwlocal->add_full_user_character($1)) {
				print "$text [$1]\n";
				die $$fpwlocal->error_message() . "\n";
			}
		}
		elsif ($text =~ /\{hc-(.+?)\}/) {
			if (!$$fpwlocal->add_half_user_character($1)) {
				print "$text [$1]\n";
				die $$fpwlocal->error_message() . "\n";
			}
		}
		elsif ($text =~ /<.+?>/){
		}
		else {
			$text =~ s/&lt;/</g;
			$text =~ s/&gt;/>/g;
			$text =~ s/&quot;/\"/g;
			if (!$$fpwlocal->add_text($text)) {
				print "text:[$text]\n";
				print_debug($text);
				die $$fpwlocal->error_message() . "\n";
			}
		}
	}

    return 1;
}
1;
