#!/usr/bin/perl

use strict;
use XML::DOM;
use FileHandle;
use Compress::Raw::Zlib;

use vars qw($ldoce5dir);
use vars qw($collobox $thesbox $grambox $f2nbox);
use vars qw($sideex $sidethes $sidecollo $sidephras $sideword $wdfamily $wdorigin $verbform);
use vars qw($skipwdorigin $skipverbform);
use vars qw($phrasalverb $gramshortform $geoshortform $condsearch);
use vars qw(%symbol_table %symbol_search_word);
use vars qw($image_type $sound_type);
use vars qw($parse_debug);

my $td_col1;
my %cont_hash;
my %head_hash;
my @hash_order=();

require './ldoce5-fpw.conf';
require './gaijitbl.pl';

sub print_debug {
	my($str) = shift @_;
	
	my $cc = unpack("H*", $str);
	print "[$cc]\n";
}

sub get_content {
	my($content) = shift @_;

    $content =~ s/\n//g;
    $content =~ s/\r//g;

    $content = '<?xml version="1.0" encoding="utf-8"?>' . $content;

    my $parser = new XML::DOM::Parser;
    my $doc = $parser->parse($content);

	# get entrytag
	my $firstnode = $doc->getFirstChild;
	my $firstnode_name = $firstnode->getNodeName;
	my $entrytag = $firstnode->getAttribute('id');
	($firstnode_name, $entrytag) = get_entrytag($firstnode_name, $entrytag);

	# get headword/searchword for body
	my ($headword, $headword2, $searchword) = get_headword($doc, $firstnode_name);
	$headword = replace_symbol($headword);
	$headword2 = replace_symbol($headword2);
	$searchword = replace_symbol_search_word($searchword);

	# get text
    my $text = replace_symbol(get_text($doc, $firstnode_name));
	if ($firstnode_name =~ /Entry/) {
		$text .= get_sidepanel($doc);
	}

    $doc->dispose;

    return ($entrytag, $headword, $headword2, $searchword, $text);
}

sub prescan_content {
	my ($path, $total) = @_;
	
	my $srcdir = $ldoce5dir.$path;
	$srcdir =~ s/^(.+?)\/?$/$1\//;
	
	my $content_filename = $srcdir.'CONTENT.tda';
	my $content_index_filename = $srcdir.'CONTENT.tda.tdz';
	
	my $content_filename = $srcdir.'CONTENT.tda';
	my $content_index_filename = $srcdir.'CONTENT.tda.tdz';
	
	my $content_handle = new FileHandle;
	if (!$content_handle->open("$content_filename", 'r')) {
		die "$content_filename: $^E\n";
	}
	binmode $content_handle;
	my $content_index_handle = new FileHandle;
	if (!$content_index_handle->open("$content_index_filename", 'r')) {
		die "$content_index_filename: $^E\n";
	}
	binmode $content_index_handle;
	
	my $i=0;
	for (;;) {
		my $size;
		my $tmp;
		my $zipped_contents;
		my $contents;
		my $content;
		
		if (read($content_index_handle, $tmp, 8) != 8) {
			last;
		}
		($size) = unpack("x4V", $tmp);
		if (read($content_handle, $zipped_contents, $size) != $size) {
			die "File reading error: $content_filename\n";
		}
		my ($inflater, $status) = new Compress::Raw::Zlib::Inflate();
		if ($status != Z_OK) {
			die "Failed to initialize inflater\n";
		}
		$inflater->inflate($zipped_contents, $contents);
		if ($status != Z_OK && $status != Z_STREAM_END) {
			die "Failed to inflate\n";
		}
		foreach $content (split(/\0+/, $contents)) {
			my ($entrytag, $headword, $headword2, $searchword, $text) = get_content($content);
			$cont_hash{$entrytag} = $text;
			$head_hash{$entrytag} = $headword2;
			push @hash_order , $entrytag;
			$i++;
			$| = 1;
			printf("%-78s\r", "prescan: [$i/$total:$entrytag]");
			$| = 0;
		}
	}
	close $content_handle;
	close $content_index_handle;
	print "\n";
}

sub get_entrytag {
	my($firstnode_name) = shift @_;
	my($entrytag) = shift @_;
	
	if ($firstnode_name =~ /Entry/) {
		if (length($entrytag) < 6) {
			$firstnode_name = "etymologies";
			$entrytag = "etymologies_".$entrytag;
		}
	} elsif ($firstnode_name =~ /examples/) {
		$entrytag = "examples_".$entrytag;
	} elsif ($firstnode_name =~ /Section/) {
		$entrytag = "thesaurus_".$entrytag;
	} elsif ($firstnode_name =~ /Collos/) {
		$entrytag = "collocations_".$entrytag;
	} elsif ($firstnode_name =~ /Phrases/) {
		$entrytag = "phrases_".$entrytag;
	} elsif ($firstnode_name =~ /category/) {
		$entrytag = "word_sets_".$entrytag;
	} elsif ($firstnode_name =~ /wf/) {
		$entrytag = "word_families_".$entrytag;
	} elsif ($firstnode_name =~ /Verb/) {
		$entrytag = "verb_forms_".$entrytag;
	}
	$entrytag =~ tr/-_a-zA-Z0-9/_/c;
	return ($firstnode_name, $entrytag);
}


sub get_headword {
    my($doc) = shift @_;
    my($firstnode_name) = shift @_;
    my ($headword, $headword2, $searchword) = ("") x 3;

	# get headword/searchword for Dictionary
	if ($firstnode_name =~ /Entry/) {
		my $nodes = $doc->getElementsByTagName('EAHead')->item(0);
		$headword = $nodes->getElementsByTagName('hwd')->item(0)->getFirstChild->getNodeValue;
		
		$searchword = $headword;
		$searchword =~ s/,.*$//g;
		
		my $hw2nodes = $doc->getElementsByTagName('HYPHENATION');
		if($hw2nodes->getLength > 0){
			$headword2 = $hw2nodes->item(0)->getFirstChild->getNodeValue;
		}
		else{
			$headword2 = $headword;
		}
		
		my $hnnodes = $doc->getElementsByTagName('HOMNUM');
		if($hnnodes->getLength > 0){
			my $posnodes = $nodes->getElementsByTagName('pos');
			if($posnodes->getLength > 0 && $posnodes->item(0)->hasChildNodes){
				$headword .= " (".replace_pos_word($posnodes->item(0)->getFirstChild->getNodeValue).")";
			}
			$headword2 .= "<sup>".$hnnodes->item(0)->getFirstChild->getNodeValue."</sup>";
		}
	}
	# get headword/searchword for Side Panel Example Bank
	elsif ($firstnode_name =~ /examples/) {
		my $nodes = $doc->getElementsByTagName('exa-head')->item(0);
		my $posnodes = $nodes->getElementsByTagName('pos');
		$headword2 = $nodes->getElementsByTagName('hwd')->item(0)->getFirstChild->getNodeValue;
		
		if($posnodes->getLength > 0 && $posnodes->item(0)->hasChildNodes) {
			my $pos = replace_pos_word($posnodes->item(0)->getFirstChild->getNodeValue);
			$headword2 .= "<i> (".$pos.")</i>";
		}
	}
	# get headword/searchword for Side Panel Thesaurus
	elsif ($firstnode_name =~ /Section/) {
		if ($doc->getElementsByTagName('SECHEADING')->getLength > 0) {
			my $i=0;
			my $node;
			foreach $node ($doc->getElementsByTagName('SECHEADING')) {
				$headword2 .= " / " if ($i > 0);
				$headword2 .= $node->getFirstChild->getNodeValue;
				$i++;
			}
		}
	}
	# get Word Origin
	elsif ($firstnode_name =~ /etymologies/) {
		my $nodes = $doc->getElementsByTagName('HEAD')->item(0);
		$headword2 = $nodes->getElementsByTagName('HWD')->item(0)->getFirstChild->getNodeValue;
		my $hnnodes = $doc->getElementsByTagName('HOMNUM');
		if($hnnodes->getLength > 0){
			$headword2 .= "<sup>".$hnnodes->item(0)->getFirstChild->getNodeValue."</sup>";
		}
	}
	# get Verb form
	elsif ($firstnode_name =~ /Verb/) {
		my $nodes = $doc->getElementsByTagName('HWD')->item(0);
		$headword2 = $nodes->getFirstChild->getFirstChild->getNodeValue;
		my $hw2nodes = $nodes->getElementsByTagName('span');
		if($hw2nodes->getLength > 0){
			if($geoshortform){
				$headword2 .= replace_geo_word($hw2nodes->item(0)->getFirstChild->getNodeValue);
			} else {
				$headword2 .= $hw2nodes->item(0)->getFirstChild->getNodeValue;
			}
		}
	}
	
    return ($headword, $headword2, $searchword);
}

sub get_sidepanel {
	my($doc) = shift @_;
	my $text = "";
	my $node;

	my $sidepanel = $doc->getElementsByTagName('SE_EntryAssets')->item(0);
	foreach $node ($sidepanel->getElementsByTagName('EntryAsset')) {
		if ($node->getNodeType == ELEMENT_NODE) {
			my $typename = $node->getAttribute('type');
			# Other dictionary examples
			if ($typename eq 'other_dictionary_examples') {
				$text .= get_refs('Other dictionary examples', $node, $sideex) if $sideex > 0;
			}
			# Examples from the corpus
			elsif ($typename eq 'corpus_examples') {
				$text .= get_refs('Examples from the corpus', $node, $sideex) if $sideex > 0;
			}
			# Thesaurus
			elsif ($typename eq 'thesaurus') {
				$text .= get_refs('Thesaurus', $node, $sidethes) if $sidethes > 0;
			}
			# Longman Language Acivator
			elsif ($typename eq 'activator') {
				$text .= get_refs('Longman Language Acivator', $node, $sidethes) if $sidethes > 0;
			}
			# Collocations from this entry
			elsif ($typename eq 'entry_collocations') {
				$text .= get_refs('Collocations from this entry', $node, $sidecollo) if $sidecollo > 0;
			}
			# Collocations from other entries
			elsif ($typename eq 'other_entries_collocations') {
				$text .= get_refs('Collocations from other entries', $node, $sidecollo) if $sidecollo > 0;
			}
			# Collocations from the corpus
			elsif ($typename eq 'corpus_collocations') {
				$text .= get_refs('Collocations from the corpus', $node,$sidecollo) if $sidecollo > 0;
			}
			# Word sets
			elsif ($typename eq 'word_sets') {
				$text .= get_refs('Word sets', $node, $sideword) if $sideword > 0;
			}
			# Phrases from this entry
			elsif ($typename eq 'entry_phrases') {
				$text .= get_refs('Phrases from this entry', $node, $sidephras) if $sidephras > 0;
			}
			# Phrases from other entries
			elsif ($typename eq 'other_entries_phrases') {
				$text .= get_refs('Phrases from other entries', $node, $sidephras) if $sidephras > 0;
			}
		}
	}

	if ($text ne "") {
		return "<br2/><tab2/>".$text."<br2/>";
	}
	return $text;
}

sub get_refs {
	my ($label) = shift @_;
	my ($node) = shift @_;
	my ($mode) = shift @_;
	my $text;
	my $refnode;

	foreach $refnode ($node->getElementsByTagName('Ref')) {
		if ($refnode->getNodeType == ELEMENT_NODE) {
			my $topic = $refnode->getAttribute('topic');
			my $resource = $refnode->getAttribute('resource');
			if (length($topic) > 2) {
				$topic =~ tr/-_a-zA-Z0-9/_/c;
				if (length($resource) > 0) {
					$topic = $resource."_".$topic;
				}
				if ($mode == 1) {
					$text .= " <a href><b>$label</b></a href=\"$topic\"><br/>";
				} elsif ($mode == 2) {
					$text .= get_inline_content($label, $topic, 1);
				}
			}
		}
	}
	return $text;
}

sub get_inline_content {
	my($label) = shift @_;
	my($entrytag) = shift @_;
	my($formtype) = shift @_;
	my $text;
	
	my $body = $cont_hash{$entrytag};
	my $headword2 = $head_hash{$entrytag};
	if (($headword2 eq "") || ($formtype == 0)) {
		$text .= "<tab2/><b>&lt;$label&gt;</b><br/>";
	} else {
		$text .= "<tab2/><b>&lt;$label\:</b> $headword2<b>&gt;</b><br/>";
	}
	if ($formtype == 0) {
		$body =~ s/<tab4\/>/<tab5\/>/g;
		$body =~ s/<tab3\/>/<tab4\/>/g;
		$body =~ s/<tab2\/>/<tab3\/>/g;
		$text .= $body."<br/>";
	} else {
		$text .= $body."<br2/>";
	}
}

sub get_condsearch_content {
	my($buffer) = shift @_;
	my $text;
	$buffer =~ s/<.*?>//g;
	my $resource = $buffer;
	utf8::encode($buffer);
	$buffer =~ s/^\xe2\x80\xa7//;
	utf8::decode($buffer);
	$buffer =~ s/\|//g;
	$buffer =~ s/^\s+//;
	$buffer =~ s/\s+$//;
	$resource = replace_symbol_search_word($resource);
	utf8::decode($resource);
	$resource =~ s/&lt;//g;
	$resource =~ s/&gt;//g;
	$resource =~ s/&quot;//g;
	$resource =~ s/[.?&:();=\'\!\/\+\-\*\,\#\[\]\`\{\}\|]//g;
	$resource =~ s/\s+/ /g;
	$resource =~ s/^\s+//;
	$resource =~ s/\s+$//;
#	print "$resource\n";
	$text = "<entry cond=\"".$resource."\" head=\"".$buffer."\"/>";
	return $text;
}

sub get_text {
    my($doc) = shift @_;
    my($kind) = shift @_;
    my $node;
    my $text;
    my $buffer;
    my $resource;

	foreach $node ($doc->getChildNodes) {
		my $parent_node_name = $node->getParentNode->getNodeName;
		my $spcpre  = "";
		my $spcpost = "";
		if ($node->getNodeType == ENTITY_NODE) {
			print $node->getNotationName, "\n";
		}
		elsif ($node->getNodeType == TEXT_NODE) {
			$buffer = $node->getNodeValue;
			$buffer =~ s/\t+/ /;
			$buffer =~ s/</&lt;/g;
			$buffer =~ s/>/&gt;/g;
			$buffer =~ s/\"/&quot;/g;
			if($parent_node_name eq 'b'
			   || $parent_node_name eq 'SYN'
			   || $parent_node_name eq 'OPP'
			   || $parent_node_name eq 'PTandPP'
			   || $parent_node_name eq 'PASTTENSE'
			   || $parent_node_name eq 'PASTPART'
			   || $parent_node_name eq 'PRESPART'
			   || $parent_node_name eq 'DEFBOLD'
			   || $parent_node_name eq 'THESPROPFORM' # Side Panel Thesaurus
			   || $parent_node_name eq 'hwd'          # Side Panel Thesaurus
			   || $parent_node_name eq 'AUX'		  # Verb form
			   ){
				$text .= "<b>".$buffer."</b>";
			}
			elsif ($parent_node_name eq 'THESEXA'  # Side Panel Thesaurus
				   || $parent_node_name eq 'pos'      # Side Panel Thesaurus
				   || $parent_node_name eq 'COLLEXA'  # Site Panel Collocations
				   ) { 
				$text .= "<i>".$buffer."</i>";
			}
            else {
				$text .= $buffer;
			}
		}
		elsif ($node->getNodeType == ELEMENT_NODE) {
			my $node_name = $node->getNodeName;
			#print "TAG:\<$node_name\>\n" if $parse_debug;
			if($node_name eq 'INFLX'
			   || $node_name eq 'HWD'
			   || $node_name eq 'HYPHENATION'
			   || $node_name eq 'HOMNUM'
			   || $node_name eq 'ACTIV'
			   || $node_name eq 'number'
			   ){
			}
			elsif ($node_name eq 'EXP'				# THESAURUS box
				   || $node_name eq 'EXPR'			# Gram box
				   || $node_name eq 'LEXVAR'		# RunOn
				   || $node_name eq 'LEXUNIT'		# RunOn
				   || $node_name eq 'RELATEDWD'		# RunOn
				   || ($node_name eq 'BASE' && $parent_node_name eq 'DERIV')				# RunOn
				   || ($node_name eq 'span' && $node->getAttribute('class') eq 'exabullet')	# SidePanel
				   ) {
				$text .= "<b>".get_text($node, $kind)."</b>";
			}
			elsif ($node_name eq 'POS'			# RunOn
				   || $node_name eq 'LINKWORD'	# RunOn
				   || ($node_name eq 'span' && $node->getAttribute('class') eq 'infllab')	# RunOn
				   || ($node_name eq 'span' && $node->getAttribute('class') eq 'italic')	# RunOn
				   || $node_name eq 'ORIGIN'	# Word origin
				   ) {
				$text .= "<i>".get_text($node, $kind)."</i>";
			}
			elsif (($node_name eq 'BASE' && $parent_node_name eq 'EXAMPLE')
				   || ($node_name eq 'BASE' && $parent_node_name eq 'THESEXA')				# THESAURUS box
				   ) {
				# example (LDOCE5: BASE + parent::EXAMPLE)
				# example (THESAURUS/THESAURUS BOX: BASE + parent::THESEXA)
				$buffer = get_text($node, $kind);
				$text .= get_condsearch_content($buffer) if $condsearch;
				$text .= "<i>".$buffer."</i>";
			}
			elsif($node_name eq 'REFHOMNUM'		# RunOn
				  || $node_name eq 'REFHOM'		# Word origin
				  ){
				$text .= "<sup>".get_text($node, $kind)."</sup>";
			}
			elsif($node_name eq 'Head'
				  || $node_name eq 'Section'	# XXX box
				  || $node_name eq 'Collocate'	# COLLOCATIONS box
				  || $node_name eq 'Exponent'	# THESAURUS box
				  ){
				$text .= get_text($node, $kind)."<br/>";
			}
			elsif($node_name eq 'span' && $node->getAttribute('class') eq 'sensenum'		# RunOn
				  || ($node_name eq 'span' && $node->getAttribute('class') eq 'lead')		# Word origin
				  ){	
				$text .= "<b>".get_text($node, $kind)."</b> ";
			}
			elsif(($node_name eq 'span' && $node->getAttribute('class') eq 'spokensect')
				  || ($node_name eq 'span' && $node->getAttribute('class') eq 'heading')	# XXX box
				  ){
				$text .= "<b>".get_text($node, $kind)."</b><br/>";
			}
			elsif($node_name eq 'RunOn'			# RunOn
				  || $node_name eq 'exp-head'	# Side Panel Thesaurus
				  || $node_name eq 'ws-head'	# Side Panel Thesaurus
				  || $node_name eq 'group'		# Word family
				  ){
				$text .= "<tab2/>".get_text($node, $kind)."<br/>";
			}
			elsif($node_name eq 'Subsense'
				  || $node_name eq 'Tail'
				  || $node_name eq 'Hint'		# XXX box
				  || $node_name eq 'SENSE'		# Word origin
				  ){
				$text .= "<br/><tab2/>".get_text($node, $kind)."<br/>";
			}
			elsif($node_name eq 'PhrVbEntry'
				  || $node_name eq 'SpokenSect'){
				$text .= "<br2/><tab2/>".get_text($node, $kind)."<br/>";
			}
			elsif ($node_name eq 'PronCodes' && $parent_node_name eq 'exp-body') {	# Side Panel Thesaurus
				$text .= "<tab3/>".get_text($node, $kind);
			}
			elsif(($node_name eq 'DEF' && $parent_node_name eq 'exp-body')) {		# Side Panel Thesaurus
				$text .= "<tab3/>".get_text($node, $kind)."<br/>";
			}
			elsif($node_name eq 'EXAMPLE'
				  || ($node_name eq 'THESEXA' && $parent_node_name eq 'exp-body')					# Side Panel Thesaurus
				  || ($node_name eq 'THESPROPFORM' && $node->getAttribute('class') eq 'newline')	# Side Panel Thesaurus
				  || ($node_name eq 'GLOSS' && $parent_node_name eq 'exp-body')						# Side Panel Thesaurus
				  || ($node_name eq 'GLOSS' && $parent_node_name eq 'coll-body')					# Side Panel Collocations
				  || $node_name eq 'tr'																# Verb form
				  ) {
				$text .= "<br/><tab3/>".get_text($node, $kind)."<br/>";
			}
			elsif($node_name eq 'GLOSS' && $parent_node_name eq 'Propexa') {		# Side Panel Thesaurus
				$text .= "<br/><tab4/>".get_text($node, $kind)."<br/>";
			}
			elsif($node_name eq 'THESEXA' && $parent_node_name eq 'Propexa') {	# Side Panel Thesaurus
				# example (THESAURUS: THESEXA + parent::Propexa)
				$buffer = get_text($node, $kind);
				$text .= "<br/><tab4/>";
				$text .= get_condsearch_content($buffer) if $condsearch;
				$text .= $buffer."<br/>";
			}
			elsif($node_name eq 'exa-body'		# Side Panel Examples
				  || $node_name eq 'Section'	# Side Panel Thesaurus
				  || $node_name eq 'category'	# Side Panel Thesaurus
				  || $node_name eq 'Collos'		# Side Panel Collocations
				  || $node_name eq 'Phrases'	#Side Panel Phrases
				  ) {
				$text .= get_text($node, $kind)."<br2/>";
			}
			elsif($node_name eq 'SIGNPOST'		# RunOn
				  || $node_name eq 'LABEL'		# Side Panel Examples
				  ){
				$buffer = get_text($node, $kind);
				if ($buffer =~ /^(\s*)/) {$spcpre = $1;}
				if ($buffer =~ /(\s*)$/) {$spcpost = $1;}
				$buffer =~ s/^\s+//;
				if ($parent_node_name eq 'exp-body') {
					$text .= "<tab3/>".$spcpre."[".$buffer."]".$spcpost;
				}
				elsif ($parent_node_name eq 'Propexa') {
					$text .= "<tab4/>".$spcpre."[".$buffer."]".$spcpost;
				}
				else {
					$text .= $spcpre."[".$buffer."]".$spcpost;
				}
			}
			
			elsif($node_name eq 'Sense'){
				$buffer = $node->getAttribute('id');
				if($buffer ne ''){
					$buffer =~ tr/-_a-zA-Z0-9/_/c;
					$text .= "<tab2/><a name=\"$buffer\"/>".get_text($node, $kind)."<br/>";
				}
				else{
					$text .= "<tab2/>".get_text($node, $kind)."<br/>";
				}
			}
			elsif($node_name eq 'SE_EntryAssets'){
				if ($node->getAttribute('type') eq 'inline') {
					$text .= "<br/>".get_text($node, $kind)."<br/>";
				}
			}

			## COLLOCATIONS box, THESAURUS box, Grammar box, Register box
			elsif($node_name eq 'ColloBox'){
				if ($parent_node_name eq 'Collos') {
					$text .= get_text($node, $kind);
				}
				else {
					$text .= "<br2/><tab2/>".get_text($node, $kind)."<br2/>" if $collobox;
				}
			}
			elsif($node_name eq 'ThesBox'){
				$text .= "<br2/><tab2/>".get_text($node, $kind)."<br2/>" if $thesbox;
			}
			elsif($node_name eq 'GramBox'){
				$text .= "<br2/><tab2/>".get_text($node, $kind)."<br2/>" if $grambox;
			}
			elsif($node_name eq 'F2NBox'){
				$text .= "<br/><tab2/>".get_text($node, $kind)."<br/>" if $f2nbox;
			}
			elsif($node_name eq 'HEADING' || $node_name eq 'SECHEADING'){
				$buffer = get_text($node, $kind);
				if ($buffer =~ /^(\s*)/) {$spcpre = $1;}
				if ($buffer =~ /(\s*)$/) {$spcpost = $1;}
				$buffer =~ s/^\s+//;
				if ($kind eq 'Collos') {
					$text .= "<tab2/>".$spcpre."<b>[".$buffer."]".$spcpost."</b><br/>";
				}
				elsif ($parent_node_name eq 'Section') {
					$text .= "<br/>";
				}
				else {
					$text .= $buffer."<br/>";
				}
			}

			# COLLOCATIONS box
			elsif($node_name eq 'COLLOC'){
				if ($parent_node_name eq 'coll-head') {
					$text .= "<tab2/><b>".get_text($node, $kind)."</b><br/>";
				}
				else {
					$text .= "<b>".get_text($node, $kind)."</b>";
				}
			}
			elsif($node_name eq 'COLLEXA'){
				if ($parent_node_name eq 'coll-body') {
					# example (COLLOCATIONS)
					$buffer = get_text($node, $kind);
					$text .= "<br/><tab3/>";
					$text .= get_condsearch_content($buffer) if $condsearch;
					$text .= $buffer."<br/>";
				}
				else {
					# example (COLLOCATION BOX)
					$buffer = get_text($node, $kind);
					$text .= get_condsearch_content($buffer) if $condsearch;
					$text .= $buffer;
				}
			}

			# RunOn
			elsif($node_name eq 'PHRVBHWD'){	# RunOn
				$buffer = get_text($node, $kind);
				if ($phrasalverb) {
					$resource = $buffer;
					$resource =~ s/,.*$//g;
					$resource = replace_symbol_search_word($resource);
					utf8::decode($resource);
					$text .= "<entry tag=\"".$resource."\" head=\"".$buffer."\"/><tab2/>";
				}
				$text .= "<b>".$buffer."</b> ";
			}
			elsif($node_name eq 'FREQ' || $node_name eq 'AC'){
				$buffer = get_text($node, $kind);
				if ($buffer =~ /^(\s*)/) {$spcpre = $1;}
				if ($buffer =~ /(\s*)$/) {$spcpost = $1;}
				$buffer =~ s/^\s+//;
				$text .= $spcpre."[".$buffer."]".$spcpost;
			}
			elsif($node_name eq 'REGISTERLAB'){
				$buffer = get_text($node, $kind);
				if ($buffer =~ /^(\s*)/) {$spcpre = $1;}
				if ($buffer =~ /(\s*)$/) {$spcpost = $1;}
				$buffer =~ s/^\s+//;
				if ($parent_node_name eq 'coll-body') {
					$text .= "<tab3/>".$spcpre."[".$buffer."]".$spcpost;
				}
				else {
					$text .= $spcpre."[".$buffer."]".$spcpost;
				}
			}
			elsif($node_name eq 'GRAM'){
				$buffer = get_text($node, $kind);
				if($gramshortform){
					$buffer = replace_gram_word($buffer);
				}
				$text .= $buffer;
			}
			elsif($node_name eq 'GEO'
				  || ($node_name eq 'span' && $node->getAttribute('class') eq 'geo')
				  ){
				$buffer = get_text($node, $kind);
				if ($buffer =~ /^(\s*)/) {$spcpre = $1;}
				if ($buffer =~ /(\s*)$/) {$spcpost = $1;}
				$buffer =~ s/^\s+//;
				if($geoshortform){
					$buffer = replace_geo_word($buffer);
				}
				if ($parent_node_name eq 'coll-body') {
					$text .= "<tab3/>".$spcpre."[".$buffer."]".$spcpost."<br/>";
				}
				elsif ($parent_node_name eq 'td') {
					$text .= $spcpre.$buffer.$spcpost;
				}
				else {
					$text .= $spcpre."[".$buffer."]".$spcpost;
				}
			}
			elsif($node_name eq 'span' && $node->getAttribute('class') eq 'synopp'){
				$buffer = get_text($node, $kind);
				if ($buffer =~ /^(\s*)/) {$spcpre = $1;}
				if ($buffer =~ /(\s*)$/) {$spcpost = $1;}
				$buffer =~ s/^\s(\S*)\s/$1/g;
				$text .= $spcpre."[".$buffer."]".$spcpost;
			}
			elsif($node_name eq 'NonDV'){
				$buffer = get_text($node, $kind);
				$buffer =~ s/<nw>(.*)<\/nw>/$1/g;
				$text .= "<nw>".$buffer."</nw>";
			}
			elsif($node_name eq 'Ref'){
				$buffer = $node->getAttribute('topic');
				$resource = $node->getAttribute('resource');
				if(length($buffer) > 2){
					$buffer =~ tr/-_a-zA-Z0-9/_/c;
					if ($resource eq 'word_families') {
						$text .= " <a href>Word family</a href=\"word_families_$buffer\">" if $wdfamily == 1;
						$text .= get_inline_content("Word family", "word_families_$buffer", 0) if $wdfamily == 2;
					} elsif ($resource eq 'etymologies') {
						$text .= " <a href>Word origin</a href=\"etymologies_$buffer\">" if $wdorigin == 1;
						$text .= get_inline_content("Word origin", "etymologies_$buffer", 0) if $wdorigin == 2;
					} elsif ($resource eq 'verb_forms') {
						$buffer = "verb_forms_".$buffer;
						if ($buffer =~ /$skipverbform/) {
						} else {
							$text .= " <a href>Verb form</a href=\"$buffer\">" if $verbform == 1;
							$text .= get_inline_content("Verb form", "$buffer", 0) if $verbform == 2;
						}
					} elsif ($resource eq 'menus') {
						# skip menu
					} else {
						if (length($resource) > 0 && $resource ne 'LdoceAZ') {
							$buffer = "${resource}_$buffer";
						} elsif (length($buffer) < 6) {
							$buffer = "etymologies_$buffer";
							if ($buffer =~ /$skipwdorigin/) {
								$buffer = "";
							}
						}
						if ($buffer eq "") {
							$text .= get_text($node, $kind);
						} elsif ($parent_node_name eq 'ws-body') {
							$text .= "<tab3/><a href>".get_text($node, $kind)."</a href=\"$buffer\"><br/>";
						} elsif ($parent_node_name eq 'phrase-head') {
							$text .= "<tab2/><b><a href>".get_text($node, $kind)."</a href=\"$buffer\"></b><br/>";
						} else {
							$text .= "<a href>".get_text($node, $kind)."</a href=\"$buffer\">";
						}
					}
				}
				else{
					$text .= get_text($node, $kind);
				}
			}
			elsif($node_name eq 'COLLO'||$node_name eq 'PROPFORMPREP'||$node_name eq 'PROPFORM'){
				$text .= "<br/><tab3/><b>".get_text($node, $kind)."</b>";
			}
			elsif($node_name eq 'PRON'){
				$text .= get_text($node, $kind);
			}

			# Illustration
			elsif($node_name eq 'ILLUSTRATION') {
				if ($image_type) {
					$buffer = $node->getAttribute('caption');
					$resource = $node->getAttribute('thumb');
					$resource =~ s/.*[\\\/]//g; # ե̾
					$resource =~ s/\..+$//g;    # ĥҤ
					$resource =~ tr/-_a-zA-Z0-9/_/c; # ǽʸ֤
					$text .= "<br/><bmp tag=\"$resource\" caption=\"$buffer\"/><br/>";
				}
			}
			# Pronunciation/Example/Effective sounds
			elsif($node_name eq 'Audio') {
				if ($sound_type) {
					$buffer = $node->getAttribute('resource');
					$resource = $node->getAttribute('topic');
					$resource =~ s/.*[\\\/]//g; # ե̾
					$resource =~ s/\..+$//g;    # ĥҤ
					$resource =~ tr/-_a-zA-Z0-9/_/c; # ǽʸ֤
					if ($buffer eq 'GB_HWD_PRON') {
						$text .= " <wav tag=\"$resource\" caption=\"GB\"/> ";
					}
					elsif ($buffer eq 'US_HWD_PRON') {
						$text .= "<wav tag=\"$resource\" caption=\"US\"/> ";
					}
					elsif ($buffer eq 'EXA_PRON') {
						$text .= "<wav tag=\"$resource\" caption=\"EXA\"/> ";
					}
					elsif ($buffer eq 'SFX') {
						$text .= "<wav tag=\"$resource\" caption=\"SFX\"/> ";
					}
				}
			}
			# Side Panel Examples
			elsif($node_name eq 'exa-head') {
				$text .= "<br/>";
			}
			elsif($node_name eq 'exa') {
				if ($parent_node_name eq 'phrase-body') {
					# example (PHRASE BANK)
					$buffer = get_text($node, $kind);
					$text .= get_condsearch_content($buffer) if $condsearch;
					$text .= "<tab3/>".$buffer."<br/>";
				}
				else {
					# example (EXAMPLE BANK)
					$buffer = get_text($node, $kind);
					$text .= get_condsearch_content($buffer) if $condsearch;
					$text .= "<tab2/>".$buffer."<br/>";
				}
			}
			# Side Panel Thesaurus
			elsif ($node_name eq 'pos' && $kind eq 'category') {
				$text .= ", ".get_text($node, $kind);
			}
			# Word family
			elsif ($node_name eq 'w') {
				$text .= "<br/><tab3/>{hc-u-c2b7}".get_text($node, $kind)."<br/>";
			}
			# Word origin
			elsif ($node_name eq 'br') {
				$text .= "<br/>".get_text($node, $kind);
			}
			# Verb form
			elsif ($node_name eq 'FORM') {
				$text .= "<br/><tab2/><b>\[".get_text($node, $kind)."\]</b><br/>";
			}
			elsif ($node_name eq 'td') {
				$buffer = $node->getAttribute('class');
				$resource = get_text($node, $kind);
				if ($buffer eq 'COLUMN1') {
					if ($resource eq '') {
						$resource = $td_col1;
					} else {
						$td_col1 = $resource;
					}
					$text .= $resource.": ";
				} elsif ($buffer eq 'COLUMN2') {
					$text .= "\(".$resource."\) ";
				}else {
					$text .= $resource;
				}
			}
			# Go to child-node
			else{
				print "SKIP '$kind' node <$node_name> ...\n" if $parse_debug;
				$text .= get_text($node, $kind);
			}
		}
	}

    return $text;
}

sub replace_symbol {
	my($str) = shift @_;
	my $key;
	
	utf8::encode($str);
	foreach $key (keys(%symbol_table)) {
		$str =~ s/$key/$symbol_table{$key}/g;
	}
	my $uni = $str;
	$uni =~ s/[\x00-\x7F]//g;
	$uni =~ s/(.)/sprintf('%X,', ord($1))/eg;
	if ($uni ne "") { print "[T] $uni\n";}
	
	return $str;
}

sub replace_symbol_search_word {
	my($str) = shift @_;
	my $key;
	utf8::encode($str);
	foreach $key (keys(%symbol_search_word)) {
		$str =~ s/$key/$symbol_search_word{$key}/g;
	}
	$str =~ s/(\s)+/$1/g;
	my $uni = $str;
	$uni =~ s/[\x00-\x7F]//g;
	$uni =~ s/(.)/sprintf('%X,', ord($1))/eg;
	if ($uni ne "") { print "[S] $uni\n";}
	
	return $str;
}

1;
