fork download
  1. keywords = set(["int", "float", "char", "if", "else", "while", "for", "return"])
  2. operators = set(["+", "-", "*", "/", "=", "==", "<", ">", "<=", ">=", "!="])
  3. punctuations = set([";", ",", "(", ")", "{", "}"])
  4.  
  5. counts = {
  6. "KEYWORD": 0,
  7. "IDENTIFIER": 0,
  8. "LITERAL": 0,
  9. "OPERATOR": 0,
  10. "PUNCTUATION": 0
  11. }
  12.  
  13. def is_identifier(s):
  14. if not (s[0].isalpha() or s[0] == "_"):
  15. return False
  16. for c in s:
  17. if not (c.isalnum() or c == "_"):
  18. return False
  19. return True
  20.  
  21. def tokenize(line, line_no):
  22. tokens = []
  23. errors = []
  24. i = 0
  25.  
  26. while i < len(line):
  27. if line[i].isspace():
  28. i += 1
  29. continue
  30.  
  31. if line[i].isalpha() or line[i] == "_":
  32. temp = ""
  33. while i < len(line) and (line[i].isalnum() or line[i] == "_"):
  34. temp += line[i]
  35. i += 1
  36. if temp in keywords:
  37. tokens.append(("KEYWORD", temp))
  38. counts["KEYWORD"] += 1
  39. elif is_identifier(temp):
  40. tokens.append(("IDENTIFIER", temp))
  41. counts["IDENTIFIER"] += 1
  42.  
  43. elif line[i].isdigit():
  44. temp = ""
  45. while i < len(line) and line[i].isalnum():
  46. temp += line[i]
  47. i += 1
  48. if temp.isdigit():
  49. tokens.append(("LITERAL", temp))
  50. counts["LITERAL"] += 1
  51. else:
  52. errors.append(
  53. "LEXICAL ERROR: Invalid identifier '" + temp +
  54. "' at line " + str(line_no)
  55. )
  56.  
  57. elif i + 1 < len(line) and line[i:i+2] in operators:
  58. tokens.append(("OPERATOR", line[i:i+2]))
  59. counts["OPERATOR"] += 1
  60. i += 2
  61.  
  62. elif line[i] in operators:
  63. tokens.append(("OPERATOR", line[i]))
  64. counts["OPERATOR"] += 1
  65. i += 1
  66.  
  67. elif line[i] in punctuations:
  68. tokens.append(("PUNCTUATION", line[i]))
  69. counts["PUNCTUATION"] += 1
  70. i += 1
  71.  
  72. else:
  73. errors.append(
  74. "LEXICAL ERROR: Invalid symbol '" + line[i] +
  75. "' at line " + str(line_no)
  76. )
  77. i += 1
  78.  
  79. return tokens, errors
  80.  
  81. print("Enter program (Ctrl+D on Linux/macOS, Ctrl+Z then Enter on Windows):")
  82.  
  83. lines = []
  84. try:
  85. while True:
  86. lines.append(raw_input())
  87. except EOFError:
  88. pass
  89.  
  90. line_no = 1
  91. for line in lines:
  92. print("\nLine " + str(line_no) + " : " + line)
  93. tokens, errors = tokenize(line, line_no)
  94.  
  95. print("Token Tree")
  96. for t in tokens:
  97. print(" └── " + t[0] + " : " + t[1])
  98.  
  99. for e in errors:
  100. print(e)
  101.  
  102. line_no += 1
  103.  
  104. print("\nFINAL COUNT")
  105. for k in counts:
  106. print(k + " : " + str(counts[k]))
  107.  
Success #stdin #stdout 0.02s 7164KB
stdin
int a = 10;
int x = 5;
float b = a + 20;
stdout
Enter program (Ctrl+D on Linux/macOS, Ctrl+Z then Enter on Windows):

Line 1 : int a = 10;
Token Tree
 └── KEYWORD : int
 └── IDENTIFIER : a
 └── OPERATOR : =
 └── LITERAL : 10
 └── PUNCTUATION : ;

Line 2 : int x = 5;
Token Tree
 └── KEYWORD : int
 └── IDENTIFIER : x
 └── OPERATOR : =
 └── LITERAL : 5
 └── PUNCTUATION : ;

Line 3 : float b = a + 20;
Token Tree
 └── KEYWORD : float
 └── IDENTIFIER : b
 └── OPERATOR : =
 └── IDENTIFIER : a
 └── OPERATOR : +
 └── LITERAL : 20
 └── PUNCTUATION : ;

FINAL COUNT
OPERATOR : 4
LITERAL : 3
IDENTIFIER : 4
KEYWORD : 3
PUNCTUATION : 3