实现Lua脚本预处理工具

#include <map>
using namespace std;


/*****************************************************************************************************************************************************************************
*
* 一个极其简单的代码预处理工具
* 仿造C语言的预处理,对和C语言变量名命名规则有相同规则的语言进行预处理,
* 经典例子:Lua脚本的预处理,通过define 将全局变量名称转为不可读变量(类似混淆,但实际解决的问题是local变量不足时用全局变量代替,但又不希望发布的脚本中带有任何有意义的信息)
* 同时,扩充一些支持比较的预处理和命令行预置宏,处理版本兼容时的条件编译
* 这个工具只实现一下功能:
* (1)#define Macro Code
* (2)#ifdef Macro ---#else --- #endif
* (3)#ifndef Macro --#else ---#endif
* (4)#undef Macro
*
* [*]此工程必须用unicode版本编译
*****************************************************************************************************************************************************************************/
enum CodeType
{
	CT_none,
	CT_Lua,
	CT_Cpp,
	CT_Java,
	CT_Vb,
	CT_Delphi,//字符串转义符号不是 '\' 而是 '#'
};
CodeType g_CodeType = CT_none;

enum FileCodePageType
{
	FCP_none,
	FCP_Unicode,
	FCP_Utf8,
	FCP_Multibyte,
};
FileCodePageType g_FCPType = FCP_Multibyte;
FileCodePageType g_ForceFCPType = FCP_none;//命令行设置的字符编码类型

map<CString, CString> g_mapMacro;
map<CStringA, CStringA> g_mapMacroA;

__inline void AddMacro(LPCWSTR MacroName, LPCWSTR Code)
{
	g_mapMacro[MacroName] = Code;
}

__inline void AddMacro(LPCSTR MacroName, LPCSTR Code)
{
	g_mapMacroA[MacroName] = Code;
}

__inline void DeleteMacro(LPCWSTR lpMacro)
{
	map<CString, CString>::iterator i = g_mapMacro.find(lpMacro);

	if(i != g_mapMacro.end())
	{
		g_mapMacro.erase(i);
	}
}

__inline void DeleteMacro(LPCSTR lpMacro)////由于工程是unicode 所以如果代码用ansiutf8的话,还要搜索Unicode的程序预置的宏
{
	map<CStringA, CStringA>::iterator i = g_mapMacroA.find(lpMacro);

	if(i != g_mapMacroA.end())
	{
		g_mapMacroA.erase(i);
	}

	DeleteMacro(CStringW(CStringA(lpMacro)));
}

__inline BOOL EixstMacro(LPCWSTR lpMacro)
{
	if(g_mapMacro.find(lpMacro) != g_mapMacro.end())
		return TRUE;

	return FALSE;
}

__inline BOOL EixstMacro(LPCSTR lpMacro)//由于工程是unicode 所以如果代码用ansiutf8的话,还要搜索Unicode的程序预置的宏
{
	if(g_mapMacroA.find(lpMacro) != g_mapMacroA.end())
		return TRUE;

	if(g_mapMacro.find(CStringW(CStringA(lpMacro))) != g_mapMacro.end())
		return TRUE;

	return FALSE;
}

__inline BOOL GetMarcoCode(LPCWSTR lpMacro, CString& OUT Code)
{
	map<CString, CString>::iterator i = g_mapMacro.find(lpMacro);

	if(i != g_mapMacro.end())
	{
		Code = i->second;
		return TRUE;
	}

	return FALSE;
}

__inline BOOL GetMarcoCode(LPCSTR lpMacro, CStringA& OUT Code)
{
	map<CStringA, CStringA>::iterator i = g_mapMacroA.find(lpMacro);

	if(i != g_mapMacroA.end())
	{
		Code = i->second;
		return TRUE;
	}

	return FALSE;
}

//做一个堆栈栈顶保存当前是否允许输出代码
BOOL g_Stack_Enable[1000] = {1};
BOOL g_StackEnableTop = 0;

__inline void PushConditionEnableCode(BOOL bEnable)
{
	g_Stack_Enable[++g_StackEnableTop] = bEnable;
}

__inline void PopCondition()
{
	g_StackEnableTop--;
}

__inline void InvCurrentCondition()
{
	g_Stack_Enable[g_StackEnableTop] = !(g_Stack_Enable[g_StackEnableTop]);
}

__inline BOOL CurrentConditionEnable()
{
	return !!(g_Stack_Enable[g_StackEnableTop]);
}

//////////////////////////字符串 和文件
__inline void WriteToFile(FILE* fp, const wchar_t* str, int cch)
{
	fwrite(str, 1, cch * sizeof(wchar_t), fp);
}

__inline void WriteToFile(FILE* fp, const char* str, int cch)
{
	fwrite(str, 1, cch, fp);
}

template<typename ChType>
bool StrMatch(const ChType* pStr, int Len, const ChType* pMat)
{
	for(int i = 0; i < Len; i++)
	{
		if(pStr[i] != pMat[i])
			return false;
	}

	return true;
}

__inline void OutConsoleErrPreCompile(int lineNum, LPCWSTR lpStr, int cch)
{
	CString str(lpStr, cch);
	printf("Invalid PreCompile Cmd At Line %d: %s\n", lineNum, CStringA(str));
}

__inline void OutConsoleErrPreCompile(int lineNum, LPCSTR lpStr, int cch)
{
	CStringA str(lpStr, cch);
	CA2W ws((LPCSTR)str, (g_FCPType == FCP_Utf8 ? CP_UTF8 : CP_ACP));
	CStringW wstr(ws.m_psz);
	printf("Invalid PreCompile Cmd At Line %d: %s\n", lineNum, CStringA(wstr));
}

__inline void WriteChar(FILE* fp, char ch)
{
	fwrite(&ch, 1, 1, fp);
}

__inline void WriteChar(FILE* fp, WCHAR ch)
{
	fwrite(&ch, 1, 2, fp);
}

template<typename ChType, typename StrType>
void WriteNameOrReplace(FILE* fp, const StrType& IN nameX, int LineNumber)
{
	StrType strCode;

	if(GetMarcoCode(nameX, strCode))
	{
		DoOutPutLineAndReplace<ChType, StrType>(fp, strCode, strCode.GetLength(), LineNumber);	//需要递归
	}
	else
	{
		WriteToFile(fp, nameX, nameX.GetLength());
	}
}

//////////////////////////////////////////////////////////////////////////工作逻辑
template<typename ChType>
__inline BOOL IsLetter(ChType ch)
{
	if(ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z' || ch == '_')
		return TRUE;
	return 0;
}

template<typename ChType>
__inline BOOL IsDigital(ChType ch)
{
	if(ch >= '0' && ch <= '9'  )
		return TRUE;
	return 0;
}

template<typename ChType>
void __ProcessLineCode_InQuoteAndOutQuote(ChType ch, BOOL& IN OUT bInQuote, BOOL& IN OUT bInQuoteS, bool& IN OUT bChUsed, int & IN OUT i_ReadPos, const ChType* & IN OUT ptrReadPos) //处理引号内外判断逻辑
{
	if(//假设语法正确,引号都是配对的
	    (ch == '\"' && (g_CodeType == CT_Cpp || g_CodeType == CT_Java || g_CodeType == CT_Lua || g_CodeType == CT_Vb || g_CodeType == CT_none)) ||
	    (ch == '\'' && (g_CodeType == CT_Delphi))//delphi 是用单引号
	)
	{
		bInQuote ^= 1;
		bChUsed = true;
	}
	else if(//转义符就应该在引号内,否则不管
	    (ch == '\\' && (g_CodeType == CT_Cpp || g_CodeType == CT_Java || g_CodeType == CT_Lua || g_CodeType == CT_none))
	    //delphi 是# 这个不会破坏偶数个引号规则,delphi还有两个引号表示一个引号的,也不会破坏偶数规则
	    //VB也是这样的
	)
	{
		if(bInQuote || bInQuoteS)
		{
			i_ReadPos++;//跳过一个字符
			ptrReadPos++;
			bChUsed = true;
		}
	}
	else if(ch == '\'' && (g_CodeType == CT_Cpp || g_CodeType == CT_Java || g_CodeType == CT_Lua || g_CodeType == CT_Vb || g_CodeType == CT_none))
	{
		if(bInQuote)
			bChUsed = true;
		else
		{
			bInQuoteS ^= 1;
			bChUsed = true;
		}
	}
}

template<typename ChType, typename strType>
void DoOutPutLineAndReplace(FILE* fp, const ChType* pString, int LineLen, int LineNumber)
{
	//输出行 并且带有宏替换
	//不带引号,字母数字构成的连续字符串都可以替换
	int bInQuote = 0; //双引号中
	int bInQuoteS = 0; //单引号中
	int letterStart = 0;
	BOOL bInNameGet = FALSE;

	for(int i = 0; i < LineLen; i++)
	{
		bool bChUsed = false; //字符被处理了
		ChType ch = pString[i];

		if(bInNameGet && !IsLetter(ch) && !IsDigital(ch))
		{
			bInNameGet = FALSE;
			//Got a name
			strType nameX(pString + letterStart, i - letterStart);
			WriteNameOrReplace<ChType, strType>(fp, nameX, LineNumber);
		}

		const ChType* pJump = 0;
		__ProcessLineCode_InQuoteAndOutQuote(ch, bInQuote, bInQuoteS, bChUsed, i, pJump);

		if(bChUsed)
		{
			WriteChar(fp, ch);

			if(pJump == (ChType*)1) //跳过了一个字符
			{
				WriteChar(fp, pString[i]);
			}

			continue;
		}

		if(bInQuote || bInQuoteS) //字符串没完事
		{
			WriteChar(fp, ch);

			if(pJump == (ChType*)1) //跳过了一个字符
			{
				WriteChar(fp, pString[i]);
			}

			continue;
		}

		if(IsLetter(ch) && !bInNameGet)
		{
			bInNameGet = TRUE;
			letterStart = i; //
		}

		if(!bInNameGet)
			WriteChar(fp, ch);
	}

	//remain
	if(bInNameGet && LineLen > letterStart)
	{
		//Got a name
		strType nameX(pString + letterStart, LineLen - letterStart);
		WriteNameOrReplace<ChType, strType>(fp, nameX, LineNumber);
	}
}

template<typename ChType, typename strType>
strType EraseRemark(const ChType* lpCode)
{
	//不在字符串就可识别结尾
	static ChType Str_Lua[2] = {'-', '-'};
	static ChType Str_Cpp[2] = {'/', '/'};
	static ChType Str_VB[1] = {'\''};
	ChType* pMark = Str_Lua;
	int MarkLen = _countof(Str_Lua);

	if(g_CodeType == CT_Lua)
	{
		pMark = Str_Lua;
		MarkLen = _countof(Str_Lua);
	}
	else if(g_CodeType == CT_Cpp || g_CodeType == CT_Delphi || g_CodeType == CT_Java)
	{
		pMark = Str_Cpp;
		MarkLen = _countof(Str_Cpp);
	}
	else if(g_CodeType == CT_Vb)
	{
		pMark = Str_VB;
		MarkLen = _countof(Str_VB);
	}

	int bInQuote = 0;
	int bInQuoteS = 0;
	const ChType* lpStart = lpCode;

	while(true)
	{
		ChType ch = (*lpCode);

		if(ch == 0)
			break;

		if(!bInQuoteS && !bInQuote)
		{
			//此处不在引号中
			if(StrMatch<ChType>(lpCode, MarkLen, pMark))
			{
				return strType(lpStart, lpCode - lpStart);
			}
		}

		int i_NoUsed = 0;
		bool bChUsed = false;
		__ProcessLineCode_InQuoteAndOutQuote(ch, bInQuote, bInQuoteS, bChUsed, i_NoUsed, lpCode);
		lpCode++;
	}

	return lpStart;
}

template<typename ChType, typename strType>
void ProcessOneLine(FILE* fpOut, ChType* pString, int LineLen, int LineNumber)
{
	if(LineLen == 0)
	{
		static ChType RN[2] = {'\r', '\n'};
		WriteToFile(fpOut, RN, 2);
		return;
	}

	if(pString[0] == '#') //预处理命令
	{
		static ChType Str_EMPTY[1] = {0};
		static ChType Str_Define[7] = {'#', 'd', 'e', 'f', 'i', 'n', 'e',};
		static ChType Str_IfDef[6] = {'#', 'i', 'f', 'd', 'e', 'f',};
		static ChType Str_IfNdef[7] = {'#', 'i', 'f', 'n', 'd', 'e', 'f',};
		static ChType Str_Else[5] = {'#', 'e', 'l', 's', 'e',};
		static ChType Str_endif[6] = {'#', 'e', 'n', 'd', 'i', 'f'};
		static ChType Str_undef[6] = {'#', 'u', 'n', 'd', 'e', 'f'};

		//遇到预处理时,去掉注释内容(lua:两个减号的行尾注释,C++:两个除号的行尾注释,VB:单引号行尾注释,根据文件类型来搞,没有文件类型就当作lua)
		if(LineLen > 7 && StrMatch(pString, 7, Str_Define)) //#define MACRO CODE
		{
			if(CurrentConditionEnable())//当前允许输出代码时Define Macro才有效
			{
				strType str(pString + 7, LineLen - 7);
				str.TrimLeft();
				int fk = str.Find((ChType)' ');

				if(fk > 0)
				{
					strType strName = str.Left(fk);
					strName.Trim();
					strType strCode = str.Mid(fk + 1);
					strType strCodeNoRemark = EraseRemark<ChType, strType>(strCode);
					strCodeNoRemark.TrimRight();
					AddMacro(strName, strCodeNoRemark);
				}
				else//#define MACRO
				{
					strType strMacroNoRemark = EraseRemark<ChType, strType>(str);
					strMacroNoRemark.Trim();
					AddMacro(strMacroNoRemark, Str_EMPTY);
				}
			}
		}
		else if (LineLen>6 && StrMatch(pString,6,Str_undef))//#undef MACRO
		{
			if(CurrentConditionEnable())//当前允许输出代码时才有效
			{
				strType str(pString + 6, LineLen - 6);
				strType strNoRemark = EraseRemark<ChType, strType>(str);
				strNoRemark.Trim();
				DeleteMacro(strNoRemark);
			}
		}
		else if(LineLen > 6 && StrMatch(pString, 6, Str_IfDef)) //#ifdef MACRO
		{
			if(CurrentConditionEnable())//当前允许输出代码时才有效
			{
				strType str(pString + 6, LineLen - 6);
				strType strNoRemark = EraseRemark<ChType, strType>(str);
				strNoRemark.Trim();

				if(EixstMacro(strNoRemark))
				{
					PushConditionEnableCode(TRUE);
				}
				else
				{
					PushConditionEnableCode(FALSE);
				}
			}
		}
		else if(LineLen > 7 && StrMatch(pString, 7, Str_IfNdef)) //#ifndef MACRO
		{
			if(CurrentConditionEnable())//当前允许输出代码时才有效
			{
				strType str(pString + 7, LineLen - 7);
				strType strNoRemark = EraseRemark<ChType, strType>(str);
				strNoRemark.Trim();

				if(!EixstMacro(strNoRemark))
				{
					PushConditionEnableCode(TRUE);
				}
				else
				{
					PushConditionEnableCode(FALSE);
				}
			}
		}
		else if(LineLen >= 5 && StrMatch(pString, 5, Str_Else)) //#else
		{
			InvCurrentCondition();
		}
		else if(LineLen >= 6 && StrMatch(pString, 6, Str_endif)) //#endif
		{
			PopCondition();
		}
		else
		{
			//error
			OutConsoleErrPreCompile(LineNumber, pString, LineLen);
		}
	}
	else
	{
		if(CurrentConditionEnable())
		{
			DoOutPutLineAndReplace<ChType, strType>(fpOut, pString, LineLen, LineNumber);
		}
	}
}

template<typename ChType, typename strType>
void DoPreCompileCodeT(FILE* fpOut, ChType* pString)
{
	//先按行分割
	ChType* pStart = pString;
	int lineNum = 1;

	while(true)
	{
		ChType ch = (*pString);
		pString++;

		if(ch == '\n' || ch == 0)
		{
			//find a line
			int LineLen = pString - pStart;

			if(ch == 0)
			{
				LineLen--;//结尾0不要
			}

			if(LineLen > 0)
			{
				ProcessOneLine< ChType, strType>(fpOut, pStart, LineLen, lineNum);
			}

			//next Line
			lineNum++;
			pStart	= pString;
		}

		if(ch == 0)
		{
			break;
		}
	}
}

void DoPreCompileCode(CString strFileIn, CString strFileOut)
{
	//简单起见将文件直接读入内存(文件如果没有Unicode的BoM头 就是ansi或utf-8 )
	FILE* fpInput = _wfopen(strFileIn, L"rb");

	if(fpInput)
	{
		long flen = 0;
		{
			long cur = ftell(fpInput);
			fseek(fpInput, 0, SEEK_END);
			flen = ftell(fpInput);
			fseek(fpInput, cur, SEEK_SET);
		}
		BYTE* pBuf = new(nothrow) BYTE[flen + 10];

		if(pBuf)
		{
			memset(pBuf, 0, flen + 10);
			fread(pBuf, 1, flen, fpInput);
			//创建输出文件
			FILE* fpOut = _wfopen(strFileOut, L"wb+");

			if(fpOut)
			{
				//判断是否是Unicode
				const WORD bom = 0xFEFF;
				//EF BB BF 是Utf8的bom头,应该识别一下并跳过,不然第一个#的预处理无法识别
				const BYTE utf8Bom[3] = {0xEF, 0xBB, 0xBF};
				BOOL nHasUnicodeBOM = 0;
				INT nHasUtf8BOM = 0;

				if(flen >= 2 && (*((WORD*)pBuf)) == bom)
					nHasUnicodeBOM = 2;

				if(flen >= 3 && pBuf[0] == utf8Bom[0] && pBuf[1] == utf8Bom[1] && pBuf[2] == utf8Bom[2])
					nHasUtf8BOM = 3;

				BOOL bWideCharOrMultyChar = TRUE; //true=WCHAR false=CHAR

				if(g_ForceFCPType == FCP_none) //没有强制指定字符集,采用bom头判断
				{
					if(nHasUnicodeBOM)
					{
						g_FCPType = FCP_Unicode;
						bWideCharOrMultyChar = TRUE;
					}
					else
					{
						if(nHasUtf8BOM)
							g_FCPType = FCP_Utf8;
						else
							g_FCPType = FCP_Multibyte;

						bWideCharOrMultyChar = FALSE;
					}
				}
				else//强制指定字符集
				{
					g_FCPType = g_ForceFCPType;

					if(g_ForceFCPType == FCP_Unicode) //宽字符
					{
						bWideCharOrMultyChar = TRUE;
					}
					else if(g_ForceFCPType == FCP_Multibyte || g_ForceFCPType == FCP_Utf8) //多字符
					{
						bWideCharOrMultyChar = FALSE;

						if(g_ForceFCPType != FCP_Utf8)//就没有bom
							nHasUtf8BOM = 0;
					}
				}

				if(bWideCharOrMultyChar)
				{
					WCHAR* pStrUnicode = (WCHAR*)(pBuf + nHasUnicodeBOM);

					if(nHasUnicodeBOM)
						fwrite(&bom, 1, sizeof(bom), fpOut);

					DoPreCompileCodeT<WCHAR, CStringW>(fpOut, pStrUnicode);
				}
				else
				{
					char* pStrUtf8OrAnsi = (char*)(pBuf + nHasUtf8BOM);

					if(nHasUtf8BOM)
						fwrite(&utf8Bom[0], 1, sizeof(utf8Bom), fpOut);

					DoPreCompileCodeT<char, CStringA>(fpOut, pStrUtf8OrAnsi);
				}

				fclose(fpOut);
			}
			else
			{
				wprintf(L"cannot open output file %s\n", strFileOut);
			}

			delete[] pBuf;
		}
		else
		{
			printf("cannot alloc buf\n");
		}

		fclose(fpInput);
	}
	else
	{
		wprintf(L"cannot open input file %s\n", strFileIn);
	}
}

void parse_option(CString strOption)
{
	if(strOption.GetLength() > 5 && strOption.Left(5).CompareNoCase(L"/def:") == 0)
	{
		//补充一个宏定义
		CString strMacroName = strOption.Mid(5);
		AddMacro(strMacroName, L"");
	}
	else if(strOption.GetLength() > 4 && strOption.Left(4).CompareNoCase(L"/cp:") == 0)
	{
		//设置字符集
		CString strCP = strOption.Mid(4);

		if(strCP.CompareNoCase(L"ansi") == 0)
		{
			g_ForceFCPType = FCP_Multibyte;
		}
		else if(strCP.CompareNoCase(L"utf8") == 0)
		{
			g_ForceFCPType = FCP_Utf8;
		}
		else if(strCP.CompareNoCase(L"unicode") == 0)
		{
			g_ForceFCPType = FCP_Unicode;
		}
		else
		{
			wprintf(L"unknown code page setting %s, Use defalut\n", strCP);
		}
	}
}

void GetCodeType(CString strFileInput)
{
	if(strFileInput.GetLength() > 4 && strFileInput.Right(4).CompareNoCase(L".lua") == 0)
		g_CodeType = CT_Lua;
	else if(strFileInput.GetLength() > 4 && strFileInput.Right(4).CompareNoCase(L".cpp") == 0)
		g_CodeType = CT_Cpp;
	else if(strFileInput.GetLength() > 2 && strFileInput.Right(2).CompareNoCase(L".c") == 0)
		g_CodeType = CT_Cpp;
	else if(strFileInput.GetLength() > 3 && strFileInput.Right(3).CompareNoCase(L".js") == 0)
		g_CodeType = CT_Java;
	else if(strFileInput.GetLength() > 5 && strFileInput.Right(5).CompareNoCase(L".java") == 0)
		g_CodeType = CT_Java;
	else if(strFileInput.GetLength() > 4 && strFileInput.Right(4).CompareNoCase(L".vbs") == 0)
		g_CodeType = CT_Vb;
	else if(strFileInput.GetLength() > 4 && strFileInput.Right(4).CompareNoCase(L".bas") == 0)
		g_CodeType = CT_Vb;
	else if(strFileInput.GetLength() > 4 && strFileInput.Right(4).CompareNoCase(L".pas") == 0)
		g_CodeType = CT_Delphi;
	else
		g_CodeType = CT_Lua;
}

int _tmain(int argc, _TCHAR* argv[])
{
	//app x.code out.code "/def:Macro" ...
	if(argc >= 3)
	{
		CString strFileInput = argv[1];
		GetCodeType(strFileInput);
		CString strFileOutput = argv[2];

		for(int i = 3; i < argc; i++)
		{
			parse_option(argv[i]);
		}

		DoPreCompileCode(strFileInput, strFileOutput);
	}
	else
	{
		printf("no input file or out file ,usage : app x.code out.code \"/def:Macro\" ... \n");
		printf("    options: \n");
		printf("    (1)\"/def:MACRO\" = pre-Define a Macro Named MACRO\n");
		printf("    (2)\"/cp:ansi/utf8/unicode\" = Set Code\'s CodePage\n");
	}

	return 0;
}


猜你喜欢

转载自blog.csdn.net/lif12345/article/details/79282697