文章详情

短信预约-IT技能 免费直播动态提醒

请输入下面的图形验证码

提交验证

短信预约提醒成功

用Python实现 词法分析器(Lexical Analyzer)

2023-06-02 01:01

关注

  from __future__ import print_function

  import sys

  # following two must remain in the same order

  tk_EOI, tk_Mul, tk_Div, tk_Mod, tk_Add, tk_Sub, tk_Negate, tk_Not, tk_Lss, tk_Leq, tk_Gtr, \

  tk_Geq, tk_Eq, tk_Neq, tk_Assign, tk_And, tk_Or, tk_If, tk_Else, tk_While, tk_Print, \

  tk_Putc, tk_Lparen, tk_Rparen, tk_Lbrace, tk_Rbrace, tk_Semi, tk_Comma, tk_Ident, \

  tk_Integer, tk_String = range(31)

  all_syms = ["End_of_input", "Op_multiply", "Op_divide", "Op_mod", "Op_add", "Op_subtract",

  "Op_negate", "Op_not", "Op_less", "Op_lessequal", "Op_greater", "Op_greaterequal",

  "Op_equal", "Op_notequal", "Op_assign", "Op_and", "Op_or", "Keyword_if",

  "Keyword_else", "Keyword_while", "Keyword_print", "Keyword_putc", "LeftParen",

  "RightParen", "LeftBrace", "RightBrace", "Semicolon", "Comma", "Identifier",

  "Integer", "String"]

  # single character only symbols

  symbols = { '{': tk_Lbrace, '}': tk_Rbrace, '(': tk_Lparen, ')': tk_Rparen, '+': tk_Add, '-': tk_Sub,

  '*': tk_Mul, '%': tk_Mod, ';': tk_Semi, ',': tk_Comma }

  key_words = {'if': tk_If, 'else': tk_Else, 'print': tk_Print, 'putc': tk_Putc, 'while': tk_While}

  the_ch = " " # dummy first char - but it must be a space

  the_col = 0

  the_line = 1

  input_file = None

  #*** show error and exit

  def error(line, col, msg):

  print(line, col, msg)

  exit(1)

  #*** get the next character from the input

  def next_ch():

  global the_ch, the_col, the_line

  the_ch = input_file.read(1)

  the_col += 1

  if the_ch == '\n':

  the_line += 1

  the_col = 0

  return the_ch

  #*** 'x' - character constants

  def char_lit(err_line, err_col):

  n = ord(next_ch()) # skip opening quote

  if the_ch == '\'':

  error(err_line, err_col, "empty character constant")

  elif the_ch == '\\':

  next_ch()

  if the_ch == 'n':

  n = 10

  elif the_ch == '\\':

  n = ord('\\')

  else:

  error(err_line, err_col, "unknown escape sequence \\%c" % (the_ch))

  if next_ch() != '\'':

  error(err_line, err_col, "multi-character constant")

  next_ch()

  return tk_Integer, err_line, err_col, n

  #*** process divide or comments

  def div_or_cmt(err_line, err_col):

  if next_ch() != '*':

  return tk_Div, err_line, err_col

  # comment found

  next_ch()

  while True:

  if the_ch == '*':

  if next_ch() == '/':

  next_ch()

  return gettok()

  elif len(the_ch) == 0:

  error(err_line, err_col, "EOF in comment")

  else:

  next_ch()

  #*** "string"

  def string_lit(start, err_line, err_col):

  text = ""

  while next_ch() != start:

  if len(the_ch) == 0:

  error(err_line, err_col, "EOF while scanning string literal")

  if the_ch == '\n':

  error(err_line, err_col, "EOL while scanning string literal")

  text += the_ch

  next_ch()

  return tk_String, err_line, err_col, text

  #*** handle identifiers and integers

  def ident_or_int(err_line, err_col):

  is_number = True

  text = ""

  while the_ch.isalnum() or the_ch == '_':

  text += the_ch

  if not the_ch.isdigit():

  is_number = False

  next_ch()

  if len(text) == 0:

  error(err_line, err_col, "ident_or_int: unrecognized character: (%d) '%c'" % (ord(the_ch), the_ch))

  if text[0].isdigit():

  if not is_number:

  error(err_line, err_col, "invalid number: %s" % (text))

  n = int(text)

  return tk_Integer, err_line, err_col, n

  if text in key_words:

  return key_words[text], err_line, err_col

  return tk_Ident, err_line, err_col, text

  #*** look ahead for '>=', etc.

  def follow(expect, ifyes, ifno, err_line, err_col):

  if next_ch() == expect:

  next_ch()

  return ifyes, err_line, err_col

  if ifno == tk_EOI:郑州人流医院哪家好 http://mobile.zhongyuan120.com/

  error(err_line, err_col, "follow: unrecognized character: (%d) '%c'" % (ord(the_ch), the_ch))

  return ifno, err_line, err_col

  #*** return the next token type

  def gettok():

  while the_ch.isspace():

  next_ch()

  err_line = the_line

  err_col = the_col

  if len(the_ch) == 0: return tk_EOI, err_line, err_col

  elif the_ch == '/': return div_or_cmt(err_line, err_col)

  elif the_ch == '\'': return char_lit(err_line, err_col)

  elif the_ch == '<': return follow('=', tk_Leq, tk_Lss, err_line, err_col)

  elif the_ch == '>': return follow('=', tk_Geq, tk_Gtr, err_line, err_col)

  elif the_ch == '=': return follow('=', tk_Eq, tk_Assign, err_line, err_col)

  elif the_ch == '!': return follow('=', tk_Neq, tk_Not, err_line, err_col)

  elif the_ch == '&': return follow('&', tk_And, tk_EOI, err_line, err_col)

  elif the_ch == '|': return follow('|', tk_Or, tk_EOI, err_line, err_col)

  elif the_ch == '"': return string_lit(the_ch, err_line, err_col)

  elif the_ch in symbols:

  sym = symbols[the_ch]

  next_ch()

  return sym, err_line, err_col

  else: return ident_or_int(err_line, err_col)

  #*** main driver

  input_file = sys.stdin

  if len(sys.argv) > 1:

  try:

  input_file = open(sys.argv[1], "r", 4096)

  except IOError as e:

  error(0, 0, "Can't open %s" % sys.argv[1])

  while True:

  t = gettok()

  tok = t[0]

  line = t[1]

  col = t[2]

  print("%5d %5d %-14s" % (line, col, all_syms[tok]), end='')

  if tok == tk_Integer: print(" %5d" % (t[3]))

  elif tok == tk_Ident: print(" %s" % (t[3]))

  elif tok == tk_String: print(' "%s"' % (t[3]))

  else: print("")

  if tok == tk_EOI:

  break

  输出(测试用例三)

  5 16 Keyword_print

  5 40 Op_subtract

  6 16 Keyword_putc

  6 40 Op_less

  7 16 Keyword_if

  7 40 Op_greater

  8 16 Keyword_else

  8 40 Op_lessequal

  9 16 Keyword_while

  9 40 Op_greaterequal

  10 16 LeftBrace

  10 40 Op_equal

  11 16 RightBrace

  11 40 Op_notequal

  12 16 LeftParen

  12 40 Op_and

  13 16 RightParen

  13 40 Op_or

  14 16 Op_subtract

  14 40 Semicolon

  15 16 Op_not

  15 40 Comma

  16 16 Op_multiply

  16 40 Op_assign

  17 16 Op_divide

  17 40 Integer 42

  18 16 Op_mod

  18 40 String "String literal"

  19 16 Op_add

  19 40 Identifier variable_name

  20 26 Integer 10

  21 26 Integer 92

  22 26 Integer 32

  23 1 End_of_input

阅读原文内容投诉

免责声明:

① 本站未注明“稿件来源”的信息均来自网络整理。其文字、图片和音视频稿件的所属权归原作者所有。本站收集整理出于非商业性的教育和科研之目的,并不意味着本站赞同其观点或证实其内容的真实性。仅作为临时的测试数据,供内部测试之用。本站并未授权任何人以任何方式主动获取本站任何信息。

② 本站未注明“稿件来源”的临时测试数据将在测试完成后最终做删除处理。有问题或投稿请发送至: 邮箱/279061341@qq.com QQ/279061341

软考中级精品资料免费领

  • 历年真题答案解析
  • 备考技巧名师总结
  • 高频考点精准押题
  • 2024年上半年信息系统项目管理师第二批次真题及答案解析(完整版)

    难度     813人已做
    查看
  • 【考后总结】2024年5月26日信息系统项目管理师第2批次考情分析

    难度     354人已做
    查看
  • 【考后总结】2024年5月25日信息系统项目管理师第1批次考情分析

    难度     318人已做
    查看
  • 2024年上半年软考高项第一、二批次真题考点汇总(完整版)

    难度     435人已做
    查看
  • 2024年上半年系统架构设计师考试综合知识真题

    难度     224人已做
    查看

相关文章

发现更多好内容

猜你喜欢

AI推送时光机
位置:首页-资讯-后端开发
咦!没有更多了?去看看其它编程学习网 内容吧
首页课程
资料下载
问答资讯