fork download
  1. #include <iostream>
  2. #include <string>
  3. #include <vector>
  4. #include <unordered_map>
  5. #include <cctype>
  6. #include <regex>
  7.  
  8. using namespace std;
  9.  
  10. // 定义Token结构
  11. struct Token {
  12. enum Type {
  13. KEYWORD, IDENTIFIER, INTEGER, FLOAT, CHAR_LITERAL, STRING_LITERAL,
  14. OPERATOR, DELIMITER, UNKNOWN
  15. };
  16.  
  17. Type type; // Token的类型
  18. string value; // Token的值
  19.  
  20. Token(Type t, const string& v) : type(t), value(v) {}
  21. };
  22.  
  23. // 关键字映射表
  24. unordered_map<string, Token::Type> keywords = {
  25. {"int", Token::KEYWORD}, {"double", Token::KEYWORD}, {"char", Token::KEYWORD}, {"string", Token::KEYWORD},
  26. {"if", Token::KEYWORD}, {"else", Token::KEYWORD}, {"while", Token::KEYWORD}, {"for", Token::KEYWORD},
  27. {"return", Token::KEYWORD}, {"void", Token::KEYWORD}, {"true", Token::KEYWORD}, {"false", Token::KEYWORD}
  28. };
  29.  
  30. // 分隔符映射表
  31. unordered_map<char, Token::Type> delimiters = {
  32. {'(', Token::DELIMITER}, {')', Token::DELIMITER}, {'{', Token::DELIMITER}, {'}', Token::DELIMITER},
  33. {'[', Token::DELIMITER}, {']', Token::DELIMITER}, {',', Token::DELIMITER}, {'.', Token::DELIMITER},
  34. {';', Token::DELIMITER}, {':', Token::DELIMITER}, {'?', Token::DELIMITER}, {'->', Token::DELIMITER}
  35. };
  36.  
  37. // 运算符映射表
  38. unordered_map<string, Token::Type> operators = {
  39. {"+", Token::OPERATOR}, {"-", Token::OPERATOR}, {"*", Token::OPERATOR}, {"/", Token::OPERATOR},
  40. {"%", Token::OPERATOR}, {"=", Token::OPERATOR}, {"==", Token::OPERATOR}, {"!=", Token::OPERATOR},
  41. {"<", Token::OPERATOR}, {">", Token::OPERATOR}, {"<=", Token::OPERATOR}, {">=", Token::OPERATOR},
  42. {"&&", Token::OPERATOR}, {"||", Token::OPERATOR}, {"!", Token::OPERATOR}, {"++", Token::OPERATOR},
  43. {"--", Token::OPERATOR}
  44. };
  45.  
  46. // 辅助函数:跳过空白字符
  47. void skipWhitespace(const string& input, size_t& position) {
  48. while (position < input.size() && isspace(input[position])) {
  49. position++;
  50. }
  51. }
  52.  
  53. // 辅助函数:识别标识符或关键字
  54. Token recognizeIdentifierOrKeyword(const string& input, size_t& position) {
  55. size_t start = position;
  56. while (position < input.size() && (isalnum(input[position]) || input[position] == '_')) {
  57. position++;
  58. }
  59. string token = input.substr(start, position - start);
  60. if (keywords.find(token) != keywords.end()) {
  61. return Token(Token::KEYWORD, token);
  62. }
  63. return Token(Token::IDENTIFIER, token);
  64. }
  65.  
  66. // 辅助函数:识别整数或浮点数
  67. Token recognizeNumber(const string& input, size_t& position) {
  68. size_t start = position;
  69. bool isFloat = false;
  70.  
  71. while (position < input.size() && isdigit(input[position])) {
  72. position++;
  73. }
  74.  
  75. if (position < input.size() && input[position] == '.') {
  76. position++;
  77. isFloat = true;
  78. while (position < input.size() && isdigit(input[position])) {
  79. position++;
  80. }
  81. }
  82.  
  83. if (position < input.size() && (input[position] == 'e' || input[position] == 'E')) {
  84. position++;
  85. if (position < input.size() && (input[position] == '+' || input[position] == '-')) {
  86. position++;
  87. }
  88. while (position < input.size() && isdigit(input[position])) {
  89. position++;
  90. }
  91. isFloat = true;
  92. }
  93.  
  94. string token = input.substr(start, position - start);
  95. return isFloat ? Token(Token::FLOAT, token) : Token(Token::INTEGER, token);
  96. }
  97.  
  98. // 辅助函数:识别字符常量
  99. Token recognizeCharLiteral(const string& input, size_t& position) {
  100. size_t start = position;
  101. position++; // 跳过开头的单引号
  102. while (position < input.size() && input[position] != '\'') {
  103. position++;
  104. }
  105. position++; // 跳过结尾的单引号
  106. string token = input.substr(start, position - start);
  107. return Token(Token::CHAR_LITERAL, token);
  108. }
  109.  
  110. // 辅助函数:识别字符串常量
  111. Token recognizeStringLiteral(const string& input, size_t& position) {
  112. size_t start = position;
  113. position++; // 跳过开头的双引号
  114. while (position < input.size() && input[position] != '"') {
  115. if (input[position] == '\\') {
  116. position++; // 跳过转义字符
  117. }
  118. position++;
  119. }
  120. position++; // 跳过结尾的双引号
  121. string token = input.substr(start, position - start);
  122. return Token(Token::STRING_LITERAL, token);
  123. }
  124.  
  125. // 辅助函数:识别运算符
  126. Token recognizeOperator(const string& input, size_t& position) {
  127. size_t start = position;
  128. size_t length = 1;
  129.  
  130. while (position + length < input.size() && operators.find(input.substr(position, length + 1)) != operators.end()) {
  131. length++;
  132. }
  133.  
  134. string token = input.substr(start, length);
  135. position += length;
  136. return Token(operators[token], token);
  137. }
  138.  
  139. // 辅助函数:识别分隔符
  140. Token recognizeDelimiter(const string& input, size_t& position) {
  141. char c = input[position++];
  142. return Token(delimiters[c], string(1, c));
  143. }
  144.  
  145. // 主函数:词法分析器
  146. vector<Token> lexicalAnalyzer(const string& input) {
  147. vector<Token> tokens;
  148. size_t position = 0;
  149.  
  150. while (position < input.size()) {
  151. skipWhitespace(input, position);
  152.  
  153. if (position >= input.size()) {
  154. break;
  155. }
  156.  
  157. char currentChar = input[position];
  158.  
  159. if (isalpha(currentChar) || currentChar == '_') {
  160. tokens.push_back(recognizeIdentifierOrKeyword(input, position));
  161. } else if (isdigit(currentChar)) {
  162. tokens.push_back(recognizeNumber(input, position));
  163. } else if (currentChar == '\'') {
  164. tokens.push_back(recognizeCharLiteral(input, position));
  165. } else if (currentChar == '"') {
  166. tokens.push_back(recognizeStringLiteral(input, position));
  167. } else if (operators.find(string(1, currentChar)) != operators.end()) {
  168. tokens.push_back(recognizeOperator(input, position));
  169. } else if (delimiters.find(currentChar) != delimiters.end()) {
  170. tokens.push_back(recognizeDelimiter(input, position));
  171. } else {
  172. tokens.push_back(Token(Token::UNKNOWN, string(1, currentChar)));
  173. position++;
  174. }
  175. }
  176.  
  177. return tokens;
  178. }
  179.  
  180. // 打印Token
  181. void printTokens(const vector<Token>& tokens) {
  182. for (const auto& token : tokens) {
  183. cout << "Token Type: ";
  184. switch (token.type) {
  185. case Token::KEYWORD: cout << "KEYWORD"; break;
  186. case Token::IDENTIFIER: cout << "IDENTIFIER"; break;
  187. case Token::INTEGER: cout << "INTEGER"; break;
  188. case Token::FLOAT: cout << "FLOAT"; break;
  189. case Token::CHAR_LITERAL: cout << "CHAR_LITERAL"; break;
  190. case Token::STRING_LITERAL: cout << "STRING_LITERAL"; break;
  191. case Token::OPERATOR: cout << "OPERATOR"; break;
  192. case Token::DELIMITER: cout << "DELIMITER"; break;
  193. case Token::UNKNOWN: cout << "UNKNOWN"; break;
  194. }
  195. cout << ", Value: " << token.value << endl;
  196. }
  197. }
  198.  
  199. // 主程序
  200. int main() {
  201. string sourceCode;
  202. cout << "请输入C++代码(输入`end`结束):\n";
  203. while (true) {
  204. string line;
  205. getline(cin, line);
  206. if (line == "end") break;
  207. sourceCode += line + "\n";
  208. }
  209.  
  210. vector<Token> tokens = lexicalAnalyzer(sourceCode);
  211. printTokens(tokens);
  212.  
  213. return 0;
  214. }
  215.  
Success #stdin #stdout 0.01s 5288KB
stdin
double pi = 3.14;
char c = 'A';
string greeting = "Hello, World!";
end
stdout
请输入C++代码(输入`end`结束):
Token Type: KEYWORD, Value: double
Token Type: IDENTIFIER, Value: pi
Token Type: OPERATOR, Value: =
Token Type: FLOAT, Value: 3.14
Token Type: DELIMITER, Value: ;
Token Type: KEYWORD, Value: char
Token Type: IDENTIFIER, Value: c
Token Type: OPERATOR, Value: =
Token Type: CHAR_LITERAL, Value: 'A'
Token Type: DELIMITER, Value: ;
Token Type: KEYWORD, Value: string
Token Type: IDENTIFIER, Value: greeting
Token Type: OPERATOR, Value: =
Token Type: STRING_LITERAL, Value: "Hello, World!"
Token Type: DELIMITER, Value: ;