/* Copyright (C) 2007 One Laptop Per Child
 * Author: Marc Maurer <uwog@uwog.net>
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
 * 02111-1307, USA.
 */

#include <libxml/tree.h>

#include "ut_assert.h"
#include "ut_debugmsg.h"

#include "LanguagePattern.h"

static bool s_get_bool_value(const string& val, bool default_value = true)
{
	if (val.size() == 0)
		return default_value;
	
	if (val == "TRUE" || val == "true" || val == "1")
		return true;
	else if (val == "FALSE" || val == "false" || val == "0")
		return false;

	UT_ASSERT_HARMLESS(UT_SHOULD_NOT_HAPPEN);
	return default_value;
}

static string s_get_translatable_property(xmlNode* node, string prop)
{
	UT_return_val_if_fail(node, "");

	char* name = (char *)xmlGetProp(node, (const xmlChar *)prop.c_str());
	if (!name) // no worries, _name is allowed too, meaning it is eligable for translation
	{
		prop.insert(0, "_");
		name = (char *)xmlGetProp(node, (const xmlChar *)prop.c_str());
	}
	return (name ? name : "");
}

void LanguagePattern::construct(xmlNode* pattern, vector<LanguagePattern*>& patterns)
{
	UT_return_if_fail(pattern);
	UT_return_if_fail(pattern->type == XML_ELEMENT_NODE);
	
	if (strcmp(reinterpret_cast<const char*>(pattern->name), "escape-char") == 0)
	{
		UT_DEBUGMSG(("Pattern match: %s\n", pattern->name));
		LanguagePattern* pPattern = new LanguagePattern();
		pPattern->type = LanguagePattern::ESCAPE_CHAR;
		//pPattern->_parseAttribs(*pPattern, pattern);		
		//pPattern->_parseElements(*pPattern, pattern);
		patterns.push_back(pPattern);
	}
	else if (strcmp(reinterpret_cast<const char*>(pattern->name), "string") == 0)
	{
		xxx_UT_DEBUGMSG(("Pattern match: %s\n", pattern->name));
		LanguagePattern* pPattern = new StringLanguagePattern();
		pPattern->_parseAttribs(pattern);		
		pPattern->_parseElements(pattern);
		patterns.push_back(pPattern);
	}
	else if (strcmp(reinterpret_cast<const char*>(pattern->name), "keyword-list") == 0)
	{
		xxx_UT_DEBUGMSG(("Pattern match: %s\n", pattern->name));
		KeywordListLanguagePattern* pPattern = new KeywordListLanguagePattern(); // this is only a temporary placeholder for our keywords
		pPattern->_parseAttribs(pattern);		
		pPattern->_parseElements(pattern);

		for (vector<string>::iterator it = pPattern->m_vKeywords.begin(); it != pPattern->m_vKeywords.end(); it++)
		{
			// TODO: make a nice copy function
			KeywordLanguagePattern* pKeywordPattern = new KeywordLanguagePattern();
			pKeywordPattern->name = pPattern->name;
			pKeywordPattern->style = pPattern->style;

			if (pPattern->attr_beginning_regex.size() > 0 && pPattern->attr_end_regex.size() == 0)
			{
				pKeywordPattern->start_regex = pPattern->attr_beginning_regex;
				pKeywordPattern->endAtLineEnd = true;				
			}
			else if (pPattern->attr_beginning_regex.size() > 0 && pPattern->attr_end_regex.size() > 0)
			{
				pKeywordPattern->start_regex = pPattern->attr_beginning_regex;
				pKeywordPattern->end_regex = pPattern->attr_end_regex;
			}
			else if (pPattern->attr_beginning_regex.size() == 0 && pPattern->attr_end_regex.size() > 0)
			{
				UT_DEBUGMSG(("pPattern->attr_beginning_regex.size() == 0 && pPattern->attr_end_regex.size() > 0 for pattern %s\n", pPattern->name.c_str()));
				// TODO: is this a correct interpretation?
				pKeywordPattern->start_regex = *it; // shouldn't we add \\b before this?
				pKeywordPattern->end_regex = pPattern->attr_end_regex;		
			}
			else
			{
				string kwpat = "\\b";
				kwpat += *it;
				kwpat += "\\b";
				pKeywordPattern->regex = kwpat;
			}

			pKeywordPattern->caseSensitive = pPattern->caseSensitive;
			pKeywordPattern->matchEmptyStringAtBeginning = pPattern->matchEmptyStringAtBeginning;
			pKeywordPattern->matchEmptyStringAtEnd = pPattern->matchEmptyStringAtEnd;
			patterns.push_back(pKeywordPattern);
		}

		DELETEP(pPattern);
	}	
	else if (strcmp(reinterpret_cast<const char*>(pattern->name), "line-comment") == 0)
	{
		xxx_UT_DEBUGMSG(("Pattern match: %s\n", pattern->name));
		LanguagePattern* pPattern = new LineCommentLanguagePattern();
		pPattern->_parseAttribs(pattern);		
		pPattern->_parseElements(pattern);
		patterns.push_back(pPattern);
	}
	else if (strcmp(reinterpret_cast<const char*>(pattern->name), "block-comment") == 0)
	{
		xxx_UT_DEBUGMSG(("Pattern match: %s\n", pattern->name));
		LanguagePattern* pPattern = new BlockCommentLanguagePattern();
		pPattern->_parseAttribs(pattern);		
		pPattern->_parseElements(pattern);
		patterns.push_back(pPattern);
	}
	else if (strcmp(reinterpret_cast<const char*>(pattern->name), "syntax-item") == 0)
	{
		xxx_UT_DEBUGMSG(("Pattern match: %s\n", pattern->name));
		LanguagePattern* pPattern = new SyntaxItemLanguagePattern();
		pPattern->_parseAttribs(pattern);		
		pPattern->_parseElements(pattern);
		patterns.push_back(pPattern);
	}
	else if (strcmp(reinterpret_cast<const char*>(pattern->name), "pattern-item") == 0)
	{
		xxx_UT_DEBUGMSG(("Pattern match: %s\n", pattern->name));
		LanguagePattern* pPattern = new PatternItemLanguagePattern();
		pPattern->_parseAttribs(pattern);		
		pPattern->_parseElements(pattern);
		patterns.push_back(pPattern);
	}
	else
	{
		UT_DEBUGMSG(("Unknown pattern match: %s\n", pattern->name));
		UT_ASSERT_HARMLESS(UT_NOT_IMPLEMENTED);
	}
}

void LanguagePattern::_parseAttribs(xmlNode* patternNode)
{
	UT_return_if_fail(patternNode && patternNode->type == XML_ELEMENT_NODE);

	// get all generic attributes
	name = s_get_translatable_property(patternNode, "name");
	style = s_get_translatable_property(patternNode, "style");
}

void LanguagePattern::_parseElements(xmlNode* patternNode)
{
	UT_return_if_fail(patternNode && patternNode->type == XML_ELEMENT_NODE);
	
	for (xmlNode* child = patternNode->children; child; child = child->next)
	{
		if (child->type == XML_ELEMENT_NODE)
		{
			/* get the generic regular expressions only */
			
			if (strcmp(reinterpret_cast<const char*>(child->name), "regex") == 0)
			{
				regex = reinterpret_cast<const char*>(xmlNodeGetContent(child));
				xxx_UT_DEBUGMSG(("Found regex: %s\n", start_regex.c_str()));
			}			
			else if (strcmp(reinterpret_cast<const char*>(child->name), "start-regex") == 0)
			{
				start_regex = reinterpret_cast<const char*>(xmlNodeGetContent(child));
				xxx_UT_DEBUGMSG(("Found start-regex: %s\n", start_regex.c_str()));
			}
			else if (strcmp(reinterpret_cast<const char*>(child->name), "end-regex") == 0)
			{
				string tmp_end_regex = reinterpret_cast<const char*>(xmlNodeGetContent(child));
				if (strcmp(tmp_end_regex.c_str(), "\\n") == 0)
				{
					// this is both faster, and \n never really matches a line end in a regex
					// not sure if i consider this a hack, or a nice optimization
					endAtLineEnd = true;
				}
				else
					end_regex = tmp_end_regex;
				xxx_UT_DEBUGMSG(("Found end-regex: %s\n", end_regex.c_str()));				
			}
			else 
			{
				UT_DEBUGMSG(("Unimplemented pattern element: %s\n", child->name));
				UT_ASSERT_HARMLESS(UT_NOT_IMPLEMENTED);
			}
		}
	}
}

void KeywordListLanguagePattern::_parseAttribs(xmlNode* patternNode)
{
	UT_return_if_fail(patternNode && patternNode->type == XML_ELEMENT_NODE);

	// get all generic attributes
	LanguagePattern::_parseAttribs(patternNode);

	// get all keyword-list specific attributes
	caseSensitive = s_get_bool_value(
						s_get_translatable_property(patternNode, "case-sensitive"), 
						true
					);
	matchEmptyStringAtBeginning = s_get_bool_value(
						s_get_translatable_property(patternNode, "match-empty-string-at-beginning"), 
						false
					);
	matchEmptyStringAtEnd = s_get_bool_value(
						s_get_translatable_property(patternNode, "match-empty-string-at-end"), 
						false
					);

	// FIXME: is beginning-regex/end_regex really translatable?
	attr_beginning_regex = s_get_translatable_property(patternNode, "beginning-regex");
	attr_end_regex = s_get_translatable_property(patternNode, "end-regex");
}

void KeywordListLanguagePattern::_parseElements(xmlNode* patternNode)
{
	UT_return_if_fail(patternNode && patternNode->type == XML_ELEMENT_NODE);
	
	for (xmlNode* child = patternNode->children; child; child = child->next)
	{
		if (child->type == XML_ELEMENT_NODE)
		{
			if (strcmp(reinterpret_cast<const char*>(child->name), "keyword") == 0)
			{
				xxx_UT_DEBUGMSG(("Found keyword: %s\n", reinterpret_cast<const char*>(xmlNodeGetContent(child))));	
				m_vKeywords.push_back(reinterpret_cast<const char*>(xmlNodeGetContent(child)));
			}
			else
			{
				UT_DEBUGMSG(("Unimplemented pattern element: %s\n", child->name));
				UT_ASSERT(UT_SHOULD_NOT_HAPPEN);
			}			
		}
	}
}
