keywords = set ( [ "int" , "float" , "char" , "if" , "else" , "while" , "for" , "return" ] )
operators = set ( [ "+" , "-" , "*" , "/" , "=" , "==" , "<" , ">" , "<=" , ">=" , "!=" ] )
punctuations = set ( [ ";" , "," , "(" , ")" , "{" , "}" ] )
counts = {
"KEYWORD" : 0 ,
"IDENTIFIER" : 0 ,
"LITERAL" : 0 ,
"OPERATOR" : 0 ,
"PUNCTUATION" : 0
}
def is_identifier( s) :
if not ( s[ 0 ] .isalpha ( ) or s[ 0 ] == "_" ) :
return False
for c in s:
if not ( c.isalnum ( ) or c == "_" ) :
return False
return True
def tokenize ( line, line_no) :
tokens = [ ]
errors = [ ]
i = 0
while i < len ( line) :
if line[ i] .isspace ( ) :
i += 1
continue
if line[ i] .isalpha ( ) or line[ i] == "_" :
temp = ""
while i < len ( line) and ( line[ i] .isalnum ( ) or line[ i] == "_" ) :
temp += line[ i]
i += 1
if temp in keywords:
tokens.append ( ( "KEYWORD" , temp) )
counts[ "KEYWORD" ] += 1
elif is_identifier( temp) :
tokens.append ( ( "IDENTIFIER" , temp) )
counts[ "IDENTIFIER" ] += 1
elif line[ i] .isdigit ( ) :
temp = ""
while i < len ( line) and line[ i] .isalnum ( ) :
temp += line[ i]
i += 1
if temp.isdigit ( ) :
tokens.append ( ( "LITERAL" , temp) )
counts[ "LITERAL" ] += 1
else :
errors.append (
"LEXICAL ERROR: Invalid identifier '" + temp +
"' at line " + str ( line_no)
)
elif i + 1 < len ( line) and line[ i:i+2 ] in operators:
tokens.append ( ( "OPERATOR" , line[ i:i+2 ] ) )
counts[ "OPERATOR" ] += 1
i += 2
elif line[ i] in operators:
tokens.append ( ( "OPERATOR" , line[ i] ) )
counts[ "OPERATOR" ] += 1
i += 1
elif line[ i] in punctuations:
tokens.append ( ( "PUNCTUATION" , line[ i] ) )
counts[ "PUNCTUATION" ] += 1
i += 1
else :
errors.append (
"LEXICAL ERROR: Invalid symbol '" + line[ i] +
"' at line " + str ( line_no)
)
i += 1
return tokens, errors
print ( "Enter program (Ctrl+D on Linux/macOS, Ctrl+Z then Enter on Windows):" )
lines = [ ]
try :
while True :
lines.append ( raw_input ( ) )
except EOFError :
pass
line_no = 1
for line in lines:
print ( "\n Line " + str ( line_no) + " : " + line)
tokens, errors = tokenize ( line, line_no)
print ( "Token Tree" )
for t in tokens:
print ( " └── " + t[ 0 ] + " : " + t[ 1 ] )
for e in errors:
print ( e)
line_no += 1
print ( "\n FINAL COUNT" )
for k in counts:
print ( k + " : " + str ( counts[ k] ) )
a2V5d29yZHMgPSBzZXQoWyJpbnQiLCAiZmxvYXQiLCAiY2hhciIsICJpZiIsICJlbHNlIiwgIndoaWxlIiwgImZvciIsICJyZXR1cm4iXSkKb3BlcmF0b3JzID0gc2V0KFsiKyIsICItIiwgIioiLCAiLyIsICI9IiwgIj09IiwgIjwiLCAiPiIsICI8PSIsICI+PSIsICIhPSJdKQpwdW5jdHVhdGlvbnMgPSBzZXQoWyI7IiwgIiwiLCAiKCIsICIpIiwgInsiLCAifSJdKQoKY291bnRzID0gewogICAgIktFWVdPUkQiOiAwLAogICAgIklERU5USUZJRVIiOiAwLAogICAgIkxJVEVSQUwiOiAwLAogICAgIk9QRVJBVE9SIjogMCwKICAgICJQVU5DVFVBVElPTiI6IDAKfQoKZGVmIGlzX2lkZW50aWZpZXIocyk6CiAgICBpZiBub3QgKHNbMF0uaXNhbHBoYSgpIG9yIHNbMF0gPT0gIl8iKToKICAgICAgICByZXR1cm4gRmFsc2UKICAgIGZvciBjIGluIHM6CiAgICAgICAgaWYgbm90IChjLmlzYWxudW0oKSBvciBjID09ICJfIik6CiAgICAgICAgICAgIHJldHVybiBGYWxzZQogICAgcmV0dXJuIFRydWUKCmRlZiB0b2tlbml6ZShsaW5lLCBsaW5lX25vKToKICAgIHRva2VucyA9IFtdCiAgICBlcnJvcnMgPSBbXQogICAgaSA9IDAKCiAgICB3aGlsZSBpIDwgbGVuKGxpbmUpOgogICAgICAgIGlmIGxpbmVbaV0uaXNzcGFjZSgpOgogICAgICAgICAgICBpICs9IDEKICAgICAgICAgICAgY29udGludWUKCiAgICAgICAgaWYgbGluZVtpXS5pc2FscGhhKCkgb3IgbGluZVtpXSA9PSAiXyI6CiAgICAgICAgICAgIHRlbXAgPSAiIgogICAgICAgICAgICB3aGlsZSBpIDwgbGVuKGxpbmUpIGFuZCAobGluZVtpXS5pc2FsbnVtKCkgb3IgbGluZVtpXSA9PSAiXyIpOgogICAgICAgICAgICAgICAgdGVtcCArPSBsaW5lW2ldCiAgICAgICAgICAgICAgICBpICs9IDEKICAgICAgICAgICAgaWYgdGVtcCBpbiBrZXl3b3JkczoKICAgICAgICAgICAgICAgIHRva2Vucy5hcHBlbmQoKCJLRVlXT1JEIiwgdGVtcCkpCiAgICAgICAgICAgICAgICBjb3VudHNbIktFWVdPUkQiXSArPSAxCiAgICAgICAgICAgIGVsaWYgaXNfaWRlbnRpZmllcih0ZW1wKToKICAgICAgICAgICAgICAgIHRva2Vucy5hcHBlbmQoKCJJREVOVElGSUVSIiwgdGVtcCkpCiAgICAgICAgICAgICAgICBjb3VudHNbIklERU5USUZJRVIiXSArPSAxCgogICAgICAgIGVsaWYgbGluZVtpXS5pc2RpZ2l0KCk6CiAgICAgICAgICAgIHRlbXAgPSAiIgogICAgICAgICAgICB3aGlsZSBpIDwgbGVuKGxpbmUpIGFuZCBsaW5lW2ldLmlzYWxudW0oKToKICAgICAgICAgICAgICAgIHRlbXAgKz0gbGluZVtpXQogICAgICAgICAgICAgICAgaSArPSAxCiAgICAgICAgICAgIGlmIHRlbXAuaXNkaWdpdCgpOgogICAgICAgICAgICAgICAgdG9rZW5zLmFwcGVuZCgoIkxJVEVSQUwiLCB0ZW1wKSkKICAgICAgICAgICAgICAgIGNvdW50c1siTElURVJBTCJdICs9IDEKICAgICAgICAgICAgZWxzZToKICAgICAgICAgICAgICAgIGVycm9ycy5hcHBlbmQoCiAgICAgICAgICAgICAgICAgICAgIkxFWElDQUwgRVJST1I6IEludmFsaWQgaWRlbnRpZmllciAnIiArIHRlbXAgKwogICAgICAgICAgICAgICAgICAgICInIGF0IGxpbmUgIiArIHN0cihsaW5lX25vKQogICAgICAgICAgICAgICAgKQoKICAgICAgICBlbGlmIGkgKyAxIDwgbGVuKGxpbmUpIGFuZCBsaW5lW2k6aSsyXSBpbiBvcGVyYXRvcnM6CiAgICAgICAgICAgIHRva2Vucy5hcHBlbmQoKCJPUEVSQVRPUiIsIGxpbmVbaTppKzJdKSkKICAgICAgICAgICAgY291bnRzWyJPUEVSQVRPUiJdICs9IDEKICAgICAgICAgICAgaSArPSAyCgogICAgICAgIGVsaWYgbGluZVtpXSBpbiBvcGVyYXRvcnM6CiAgICAgICAgICAgIHRva2Vucy5hcHBlbmQoKCJPUEVSQVRPUiIsIGxpbmVbaV0pKQogICAgICAgICAgICBjb3VudHNbIk9QRVJBVE9SIl0gKz0gMQogICAgICAgICAgICBpICs9IDEKCiAgICAgICAgZWxpZiBsaW5lW2ldIGluIHB1bmN0dWF0aW9uczoKICAgICAgICAgICAgdG9rZW5zLmFwcGVuZCgoIlBVTkNUVUFUSU9OIiwgbGluZVtpXSkpCiAgICAgICAgICAgIGNvdW50c1siUFVOQ1RVQVRJT04iXSArPSAxCiAgICAgICAgICAgIGkgKz0gMQoKICAgICAgICBlbHNlOgogICAgICAgICAgICBlcnJvcnMuYXBwZW5kKAogICAgICAgICAgICAgICAgIkxFWElDQUwgRVJST1I6IEludmFsaWQgc3ltYm9sICciICsgbGluZVtpXSArCiAgICAgICAgICAgICAgICAiJyBhdCBsaW5lICIgKyBzdHIobGluZV9ubykKICAgICAgICAgICAgKQogICAgICAgICAgICBpICs9IDEKCiAgICByZXR1cm4gdG9rZW5zLCBlcnJvcnMKCnByaW50KCJFbnRlciBwcm9ncmFtIChDdHJsK0Qgb24gTGludXgvbWFjT1MsIEN0cmwrWiB0aGVuIEVudGVyIG9uIFdpbmRvd3MpOiIpCgpsaW5lcyA9IFtdCnRyeToKICAgIHdoaWxlIFRydWU6CiAgICAgICAgbGluZXMuYXBwZW5kKHJhd19pbnB1dCgpKQpleGNlcHQgRU9GRXJyb3I6CiAgICBwYXNzCgpsaW5lX25vID0gMQpmb3IgbGluZSBpbiBsaW5lczoKICAgIHByaW50KCJcbkxpbmUgIiArIHN0cihsaW5lX25vKSArICIgOiAiICsgbGluZSkKICAgIHRva2VucywgZXJyb3JzID0gdG9rZW5pemUobGluZSwgbGluZV9ubykKCiAgICBwcmludCgiVG9rZW4gVHJlZSIpCiAgICBmb3IgdCBpbiB0b2tlbnM6CiAgICAgICAgcHJpbnQoIiDilJTilIDilIAgIiArIHRbMF0gKyAiIDogIiArIHRbMV0pCgogICAgZm9yIGUgaW4gZXJyb3JzOgogICAgICAgIHByaW50KGUpCgogICAgbGluZV9ubyArPSAxCgpwcmludCgiXG5GSU5BTCBDT1VOVCIpCmZvciBrIGluIGNvdW50czoKICAgIHByaW50KGsgKyAiIDogIiArIHN0cihjb3VudHNba10pKQo=