#include <iostream>
#include <string>
#include <vector>
#include <unordered_map>
#include <cctype>
#include <regex>
using namespace std;
// 定义Token结构
struct Token {
enum Type {
KEYWORD, IDENTIFIER, INTEGER, FLOAT, CHAR_LITERAL, STRING_LITERAL,
OPERATOR, DELIMITER, UNKNOWN
};
Type type; // Token的类型
string value; // Token的值
Token(Type t, const string& v) : type(t), value(v) {}
};
// 关键字映射表
unordered_map<string, Token::Type> keywords = {
{"int", Token::KEYWORD}, {"double", Token::KEYWORD}, {"char", Token::KEYWORD}, {"string", Token::KEYWORD},
{"if", Token::KEYWORD}, {"else", Token::KEYWORD}, {"while", Token::KEYWORD}, {"for", Token::KEYWORD},
{"return", Token::KEYWORD}, {"void", Token::KEYWORD}, {"true", Token::KEYWORD}, {"false", Token::KEYWORD}
};
// 分隔符映射表
unordered_map<char, Token::Type> delimiters = {
{'(', Token::DELIMITER}, {')', Token::DELIMITER}, {'{', Token::DELIMITER}, {'}', Token::DELIMITER},
{'[', Token::DELIMITER}, {']', Token::DELIMITER}, {',', Token::DELIMITER}, {'.', Token::DELIMITER},
{';', Token::DELIMITER}, {':', Token::DELIMITER}, {'?', Token::DELIMITER}, {'->', Token::DELIMITER}
};
// 运算符映射表
unordered_map<string, Token::Type> operators = {
{"+", Token::OPERATOR}, {"-", Token::OPERATOR}, {"*", Token::OPERATOR}, {"/", Token::OPERATOR},
{"%", Token::OPERATOR}, {"=", Token::OPERATOR}, {"==", Token::OPERATOR}, {"!=", Token::OPERATOR},
{"<", Token::OPERATOR}, {">", Token::OPERATOR}, {"<=", Token::OPERATOR}, {">=", Token::OPERATOR},
{"&&", Token::OPERATOR}, {"||", Token::OPERATOR}, {"!", Token::OPERATOR}, {"++", Token::OPERATOR},
{"--", Token::OPERATOR}
};
// 辅助函数:跳过空白字符
void skipWhitespace(const string& input, size_t& position) {
while (position < input.size() && isspace(input[position])) {
position++;
}
}
// 辅助函数:识别标识符或关键字
Token recognizeIdentifierOrKeyword(const string& input, size_t& position) {
size_t start = position;
while (position < input.size() && (isalnum(input[position]) || input[position] == '_')) {
position++;
}
string token = input.substr(start, position - start);
if (keywords.find(token) != keywords.end()) {
return Token(Token::KEYWORD, token);
}
return Token(Token::IDENTIFIER, token);
}
// 辅助函数:识别整数或浮点数
Token recognizeNumber(const string& input, size_t& position) {
size_t start = position;
bool isFloat = false;
while (position < input.size() && isdigit(input[position])) {
position++;
}
if (position < input.size() && input[position] == '.') {
position++;
isFloat = true;
while (position < input.size() && isdigit(input[position])) {
position++;
}
}
if (position < input.size() && (input[position] == 'e' || input[position] == 'E')) {
position++;
if (position < input.size() && (input[position] == '+' || input[position] == '-')) {
position++;
}
while (position < input.size() && isdigit(input[position])) {
position++;
}
isFloat = true;
}
string token = input.substr(start, position - start);
return isFloat ? Token(Token::FLOAT, token) : Token(Token::INTEGER, token);
}
// 辅助函数:识别字符常量
Token recognizeCharLiteral(const string& input, size_t& position) {
size_t start = position;
position++; // 跳过开头的单引号
while (position < input.size() && input[position] != '\'') {
position++;
}
position++; // 跳过结尾的单引号
string token = input.substr(start, position - start);
return Token(Token::CHAR_LITERAL, token);
}
// 辅助函数:识别字符串常量
Token recognizeStringLiteral(const string& input, size_t& position) {
size_t start = position;
position++; // 跳过开头的双引号
while (position < input.size() && input[position] != '"') {
if (input[position] == '\\') {
position++; // 跳过转义字符
}
position++;
}
position++; // 跳过结尾的双引号
string token = input.substr(start, position - start);
return Token(Token::STRING_LITERAL, token);
}
// 辅助函数:识别运算符
Token recognizeOperator(const string& input, size_t& position) {
size_t start = position;
size_t length = 1;
while (position + length < input.size() && operators.find(input.substr(position, length + 1)) != operators.end()) {
length++;
}
string token = input.substr(start, length);
position += length;
return Token(operators[token], token);
}
// 辅助函数:识别分隔符
Token recognizeDelimiter(const string& input, size_t& position) {
char c = input[position++];
return Token(delimiters[c], string(1, c));
}
// 主函数:词法分析器
vector<Token> lexicalAnalyzer(const string& input) {
vector<Token> tokens;
size_t position = 0;
while (position < input.size()) {
skipWhitespace(input, position);
if (position >= input.size()) {
break;
}
char currentChar = input[position];
if (isalpha(currentChar) || currentChar == '_') {
tokens.push_back(recognizeIdentifierOrKeyword(input, position));
} else if (isdigit(currentChar)) {
tokens.push_back(recognizeNumber(input, position));
} else if (currentChar == '\'') {
tokens.push_back(recognizeCharLiteral(input, position));
} else if (currentChar == '"') {
tokens.push_back(recognizeStringLiteral(input, position));
} else if (operators.find(string(1, currentChar)) != operators.end()) {
tokens.push_back(recognizeOperator(input, position));
} else if (delimiters.find(currentChar) != delimiters.end()) {
tokens.push_back(recognizeDelimiter(input, position));
} else {
tokens.push_back(Token(Token::UNKNOWN, string(1, currentChar)));
position++;
}
}
return tokens;
}
// 打印Token
void printTokens(const vector<Token>& tokens) {
for (const auto& token : tokens) {
cout << "Token Type: ";
switch (token.type) {
case Token::KEYWORD: cout << "KEYWORD"; break;
case Token::IDENTIFIER: cout << "IDENTIFIER"; break;
case Token::INTEGER: cout << "INTEGER"; break;
case Token::FLOAT: cout << "FLOAT"; break;
case Token::CHAR_LITERAL: cout << "CHAR_LITERAL"; break;
case Token::STRING_LITERAL: cout << "STRING_LITERAL"; break;
case Token::OPERATOR: cout << "OPERATOR"; break;
case Token::DELIMITER: cout << "DELIMITER"; break;
case Token::UNKNOWN: cout << "UNKNOWN"; break;
}
cout << ", Value: " << token.value << endl;
}
}
// 主程序
int main() {
string sourceCode;
cout << "请输入C++代码(输入`end`结束):\n";
while (true) {
string line;
getline(cin, line);
if (line == "end") break;
sourceCode += line + "\n";
}
vector<Token> tokens = lexicalAnalyzer(sourceCode);
printTokens(tokens);
return 0;
}
