【2025-11-16】现有代码上传

This commit is contained in:
hym816
2025-11-16 21:04:02 +08:00
parent 6e114d8735
commit d71eb4576e
28 changed files with 11524 additions and 0 deletions

204
AssemblyGenerator.cpp Normal file
View File

@@ -0,0 +1,204 @@
// AssemblyGenerator.cpp
#include "AssemblyGenerator.h"
#include <sstream>
#include <iostream>
#include <cctype>
#include <cassert>
AssemblyGenerator::AssemblyGenerator(const std::vector<Quad> &quads)
: quads_(quads), stackSize_(0) {
allocateStack();
}
void AssemblyGenerator::allocateStack() {
int offset = 0;
// 1) 给所有源变量localOffset_分配空间
for (auto &q : quads_) {
if (!q.var.empty() && localOffset_.count(q.var) == 0) {
offset += 8;
localOffset_[q.var] = offset;
}
}
// 2) 给所有临时变量t1, t2, ...)分配空间
for (auto &q : quads_) {
if (!q.result.empty() && q.result[0] == 't'
&& tempOffset_.count(q.result) == 0) {
offset += 8;
tempOffset_[q.result] = offset;
}
}
// 16 字节对齐
stackSize_ = ((offset + 15) / 16) * 16;
}
std::string AssemblyGenerator::generate() {
std::ostringstream out;
// 1) 段和全局符号声明
out << ".text\n";
out << ".globl _main\n";
// 2) 如果首条 IR 是一条 "label main",先输出 _main:,然后 prologue
size_t idx = 0;
if (!quads_.empty() && quads_[0].op == "label" && quads_[0].result == "main") {
out << "_main:\n";
emitPrologue(out);
idx = 1;
} else {
// 否则也输出 prologue
emitPrologue(out);
}
// 3) 其余指令
for (size_t i = idx; i < quads_.size(); ++i) {
emitInstruction(quads_[i], out);
}
// 4) epilogue
emitEpilogue(out);
return out.str();
}
void AssemblyGenerator::emitPrologue(std::ostream &os) const {
os << "\tstp x29, x30, [sp, #-16]! // push FP, LR\n";
os << "\tmov x29, sp\n";
if (stackSize_ > 0) {
os << "\tsub sp, sp, #" << stackSize_ << "\n";
}
}
void AssemblyGenerator::emitEpilogue(std::ostream &os) const {
if (stackSize_ > 0) {
os << "\tadd sp, sp, #" << stackSize_ << "\n";
}
os << "\tldp x29, x30, [sp], #16 // pop FP, LR\n";
os << "\tret\n";
}
void AssemblyGenerator::loadOperand(const std::string &name, std::ostream &os) const {
if (!name.empty() && std::isdigit(name[0])) {
// 立即数
os << "\tmov w9, #" << name << "\n";
}
else if (localOffset_.count(name)) {
// 局部变量
os << "\tldr w9, [x29, #-" << localOffset_.at(name) << "]\n";
}
else if (tempOffset_.count(name)) {
// 临时变量
os << "\tldr w9, [x29, #-" << tempOffset_.at(name) << "]\n";
}
else {
// 全局变量
os << "\tadrp x10, " << name << "\n";
os << "\tadd x10, x10, :lo12:" << name << "\n";
os << "\tldr w9, [x10]\n";
}
}
void AssemblyGenerator::storeResult(const std::string &name, std::ostream &os) const {
if (localOffset_.count(name)) {
// 局部变量
os << "\tstr w9, [x29, #-" << localOffset_.at(name) << "]\n";
}
else if (tempOffset_.count(name)) {
// 临时变量
os << "\tstr w9, [x29, #-" << tempOffset_.at(name) << "]\n";
}
else {
// 全局变量
os << "\tadrp x10, " << name << "\n";
os << "\tadd x10, x10, :lo12:" << name << "\n";
os << "\tstr w9, [x10]\n";
}
}
void AssemblyGenerator::emitInstruction(const Quad &q, std::ostream &os) const {
using std::string;
// 标签
if (q.op == "label") {
// 非 main 的其他标签直接原样输出
if (q.result != "main") {
os << q.result << ":\n";
}
return;
}
// 无条件跳转
if (q.op == "goto") {
os << "\tb " << q.result << "\n";
return;
}
// ifFalse x -> cmp x, #0; beq L
if (q.op == "ifFalse") {
loadOperand(q.arg1, os);
os << "\tcmp w9, #0\n";
os << "\tbeq " << q.result << "\n";
return;
}
// 赋值 =
if (q.op == "=") {
loadOperand(q.arg1, os);
storeResult(q.result, os);
return;
}
// 二元算术/逻辑运算
static const std::unordered_map<string, string> binOpMap = {
{ "+", "add" }, { "-", "sub" }, { "*", "mul" },
{ "/", "sdiv" }, { "<<", "lsl" }, { ">>", "lsr" },
{ "&", "and" }, { "|", "orr" }, { "^", "eor" }
};
if (binOpMap.count(q.op)) {
loadOperand(q.arg1, os);
os << "\tmov w10, w9\n";
loadOperand(q.arg2, os);
os << "\tmov w11, w9\n";
os << "\t" << binOpMap.at(q.op) << " w9, w10, w11\n";
storeResult(q.result, os);
return;
}
// 比较运算,使用 cset 生成布尔 0/1
static const std::unordered_map<string, string> cmpCondMap = {
{"==", "eq"}, {"!=", "ne"},
{"<", "lt"}, {">", "gt"},
{"<=", "le"}, {">=", "ge"}
};
if (cmpCondMap.count(q.op)) {
loadOperand(q.arg1, os);
os << "\tmov w10, w9\n";
loadOperand(q.arg2, os);
os << "\tmov w11, w9\n";
os << "\tcmp w10, w11\n";
os << "\tcset w9, " << cmpCondMap.at(q.op) << "\n";
storeResult(q.result, os);
return;
}
// call / param / return
if (q.op == "param") {
return;
}
if (q.op == "call") {
os << "\tbl " << q.arg1 << "\n";
os << "\tmov w9, w0\n";
storeResult(q.result, os);
return;
}
if (q.op == "return") {
if (!q.arg1.empty()) {
loadOperand(q.arg1, os);
os << "\tmov w0, w9\n";
}
return;
}
std::cerr << "Unhandled IR op: " << q.op << "\n";
}

28
AssemblyGenerator.h Normal file
View File

@@ -0,0 +1,28 @@
// AssemblyGenerator.h
#ifndef MINI_C_ASSEMBLY_GENERATOR_H
#define MINI_C_ASSEMBLY_GENERATOR_H
#include "ir.h"
#include <string>
#include <vector>
#include <unordered_map>
#include <ostream>
class AssemblyGenerator {
public:
explicit AssemblyGenerator(const std::vector<Quad> &quads);
std::string generate();
private:
const std::vector<Quad> &quads_;
std::unordered_map<std::string, int> localOffset_;
std::unordered_map<std::string, int> tempOffset_;
int stackSize_;
void allocateStack();
void emitPrologue(std::ostream &os) const;
void emitEpilogue(std::ostream &os) const;
void emitInstruction(const Quad &q, std::ostream &os) const;
void loadOperand(const std::string &name, std::ostream &os) const;
void storeResult(const std::string &name, std::ostream &os) const;
};
#endif // MINI_C_ASSEMBLY_GENERATOR_H

169
InstructionEncoder.cpp Normal file
View File

@@ -0,0 +1,169 @@
// InstructionEncoder.cpp
#include "InstructionEncoder.h"
#include <cstdint>
// —————————————————————————————————————————————
// 1) 控制流
// 无条件跳转 B <label>
uint32_t encodeB(int32_t offset) {
// offset 必须是 4 字节对齐,且能 fit 26b*2
uint32_t imm26 = (uint32_t)(offset >> 2) & 0x03ffffffu;
// opcode: 0b000101 imm26
return (0b000101u << 26) | imm26;
}
// 返回 RET
uint32_t encodeRET() {
return 0xd65f03c0u;
}
// 空指令 NOP
uint32_t encodeNOP() {
return 0xd503201fu;
}
// —————————————————————————————————————————————
// 2) 〈Data Processing Immediate〉
uint32_t encodeADDimm(uint8_t rd, uint8_t rn, uint16_t imm) {
// 32-bit ADD immediate: sf=0, opc=00, S=0, opcode(2421)=1001
// base = 0x11000000
uint32_t imm12 = imm & 0xfffu; // 12-bit
return 0x11000000u
| (imm12 << 10)
| (uint32_t(rn) << 5)
| uint32_t(rd);
}
uint32_t encodeSUBimm(uint8_t rd, uint8_t rn, uint16_t imm) {
// 32-bit SUB immediate: sf=0, opc=01, S=0, opcode=1001
// base = 0x51000000
uint32_t imm12 = imm & 0xfffu;
return 0x51000000u
| (imm12 << 10)
| (uint32_t(rn) << 5)
| uint32_t(rd);
}
// MOVZ (move wide immediate)
// movz rd, #imm16, LSL #(highHalf?16:0)
uint32_t encodeMOVz(uint8_t rd, uint16_t imm16, bool highHalf) {
// sf=0, opc=2 (MOVZ), opcode=100101, base=0x52800000
uint32_t hw = highHalf ? 1u : 0u;
return 0x52800000u
| (hw << 21) // LSL 高半字
| (uint32_t(imm16 & 0xffffu) << 5)
| uint32_t(rd);
}
// CMP immediate (等价于 SUBS WZR, rn, #imm
uint32_t encodeCMPimm(uint8_t rn, uint16_t imm) {
// 32-bit SUBS immediate: sf=0, opc=01 (SUB), S=1, opcode=1001 ➔ base=0x71000000
uint32_t imm12 = imm & 0xfffu;
return 0x71000000u
| (imm12 << 10)
| (uint32_t(rn) << 5)
| 31u; // WZR 编号 31
}
// CSET rd, cond alias for CSEL rd, WZR, WZR, cond
uint32_t encodeCSET(uint8_t rd, uint8_t cond) {
// 32-bit CSEL: sf=0, opcode=1010_100, base=0x1A200000
// Rn=WZR(31), Rm=WZR(31), cond 放在 bits[15:12]
return 0x1A200000u
| (31u << 5) // Rn = WZR
| (31u << 16) // Rm = WZR
| (uint32_t(cond & 0xFu) << 12)
| uint32_t(rd);
}
// —————————————————————————————————————————————
// 3) 〈Load/Store Unsigned Immediate〉
//
// STR Wt, [Xn, #imm] (32-bit 存 4-byteimm 必须 4 字节对齐)
// LDR Wt, [Xn, #imm] (32-bit 载 4-byteimm 必须 4 字节对齐)
// 立即数在编码中以 imm12<<2 形式出现
uint32_t encodeSTR(uint8_t rt, uint8_t rn, int16_t offset) {
// base for STR (unsigned imm, 32-bit) = 0x39000000
uint32_t imm12 = (uint32_t(offset) >> 2) & 0xfffu;
return 0x39000000u
| (imm12 << 10)
| (uint32_t(rn) << 5)
| uint32_t(rt);
}
uint32_t encodeLDR(uint8_t rt, uint8_t rn, int16_t offset) {
// base for LDR (unsigned imm, 32-bit) = 0x39400000
uint32_t imm12 = (uint32_t(offset) >> 2) & 0xfffu;
return 0x39400000u
| (imm12 << 10)
| (uint32_t(rn) << 5)
| uint32_t(rt);
}
// —————————————————————————————————————————————
// 4) 〈Load/Store Pair, Immediate〉 64-bit 版,用来存/取一对寄存器)
//
// STP rt1, rt2, [rn, #imm]!
// LDP rt1, rt2, [rn], #imm
//
// imm 必须 8 字节对齐,编码里以 imm7<<3 的形式出现
uint32_t encodeSTP(uint8_t rt1, uint8_t rt2, uint8_t rn, int16_t offset) {
// 存 64-bit 对base unsigned imm = 0xA9000000
// pre-index: P=1,U=0,W=1 ➜ P(bit24)=1, U(bit23)=0, W(bit21)=1
uint32_t imm7 = (uint32_t(offset) >> 3) & 0x7fu;
return 0xA9000000u
| (imm7 << 15)
| (1u << 24) // P
| (0u << 23) // U
| (1u << 21) // W
| (uint32_t(rn) << 5)
| (uint32_t(rt2) << 10)
| uint32_t(rt1);
}
uint32_t encodeLDP(uint8_t rt1, uint8_t rt2, uint8_t rn, int16_t offset) {
// 取 64-bit 对base unsigned imm = 0xA9400000
// post-index: P=0,U=1,W=1 ➜ P=0, U=1, W=1
uint32_t imm7 = (uint32_t(offset) >> 3) & 0x7fu;
return 0xA9400000u
| (imm7 << 15)
| (0u << 24) // P
| (1u << 23) // U
| (1u << 21) // W
| (uint32_t(rn) << 5)
| (uint32_t(rt2) << 10)
| uint32_t(rt1);
}
// 5) Data-processing (register): ADD rd, rn, rm
uint32_t encodeADDreg(uint8_t rd, uint8_t rn, uint8_t rm) {
return 0x0B000000u
| (uint32_t(rm) << 16)
| (uint32_t(rn) << 5)
| uint32_t(rd);
}
// 6) Compare register: CMP rn, rm ( SUBS WZR, rn, rm)
uint32_t encodeCMPreg(uint8_t rn, uint8_t rm) {
// 32-bit SUBS (shifted register, no shift) with WZR destination:
// sf=0, opc=1, S=1, opcode=10001011000 → base = 0x2B000000 | S-bit
// Actually: SUBS register base = 0x6B000000
// bits[20:16] = Rm, [9:5] = Rn, [4:0] = 31 (WZR)
return 0x6B000000u
| (uint32_t(rm) << 16)
| (uint32_t(rn) << 5)
| 31u;
}
// 7) Conditional branch: B.<cond> offset
uint32_t encodeBCond(uint8_t cond, int32_t offset) {
// imm19 = (offset >> 2) & 0x7FFFF
// opcode = 0b01010100 (0x54) at bits[31:24]
// 32-bit instruction = 0b01010100|imm19|cond
uint32_t imm19 = (uint32_t(offset) >> 2) & 0x7FFFFu;
return (0x54u << 24)
| (imm19 << 5)
| (cond & 0xFu);
}

61
InstructionEncoder.h Normal file
View File

@@ -0,0 +1,61 @@
// InstructionEncoder.h
#ifndef INSTRUCTION_ENCODER_H
#define INSTRUCTION_ENCODER_H
#include <cstdint>
// 无条件跳转 B <label>
uint32_t encodeB(int32_t offset);
// 返回 RET
uint32_t encodeRET();
// 空指令 NOP
uint32_t encodeNOP();
// ADD (immediate): add rd, rn, #imm
uint32_t encodeADDimm(uint8_t rd, uint8_t rn, uint16_t imm);
// SUB (immediate): sub rd, rn, #imm
uint32_t encodeSUBimm(uint8_t rd, uint8_t rn, uint16_t imm);
// MOVZ (move wide immediate): movz rd, #imm16, {LSL #16 if highHalf}
uint32_t encodeMOVz(uint8_t rd, uint16_t imm16, bool highHalf);
// CMP immediate: cmp rn, #imm (encoded as subs wzr, rn, #imm)
uint32_t encodeCMPimm(uint8_t rn, uint16_t imm);
// CSET (condition set): cset rd, <cond>
// cond: 0=eq,1=ne,2=hs,3=lo,4=mi,5=pl,6=vs,7=vc,8=hi,9=ls,10=ge,11=lt,12=gt,13=le,14=al,15=nv
uint32_t encodeCSET(uint8_t rd, uint8_t cond);
// STP (pre-index): stp rt1, rt2, [rn, #offset]!
uint32_t encodeSTP(uint8_t rt1, uint8_t rt2, uint8_t rn, int16_t offset);
// LDP (post-index): ldp rt1, rt2, [rn], #offset
uint32_t encodeLDP(uint8_t rt1, uint8_t rt2, uint8_t rn, int16_t offset);
// STR (unsigned immediate): str rt, [rn, #offset]
uint32_t encodeSTR(uint8_t rt, uint8_t rn, int16_t offset);
// LDR (unsigned immediate): ldr rt, [rn, #offset]
uint32_t encodeLDR(uint8_t rt, uint8_t rn, int16_t offset);
// ADD sp, sp, #imm
inline uint32_t encodeADDsp(uint16_t imm) {
return encodeADDimm(/*rd=*/31, /*rn=*/31, imm);
}
// SUB sp, sp, #imm
inline uint32_t encodeSUBsp(uint16_t imm) {
return encodeSUBimm(/*rd=*/31, /*rn=*/31, imm);
}
// ADD (register): add rd, rn, rm
uint32_t encodeADDreg(uint8_t rd, uint8_t rn, uint8_t rm);
// Compare (register): subs wzr, rn, rm
uint32_t encodeCMPreg(uint8_t rn, uint8_t rm);
// Conditional branch B.<cond> imm
uint32_t encodeBCond(uint8_t cond, int32_t offset);
#endif // INSTRUCTION_ENCODER_H

113
ObjectFileGenerator.cpp Normal file
View File

@@ -0,0 +1,113 @@
// ObjectFileGenerator.cpp
#include "ObjectFileGenerator.h"
#include <fstream>
#include <cstring>
#include <mach-o/loader.h>
#include <mach-o/nlist.h>
#include <mach/vm_prot.h>
#include <iostream>
bool ObjectFileGenerator::writeObject(const std::string &filename,
const std::vector<uint32_t> &code) {
std::cout << "Code size: " << code.size() << std::endl;
for (auto instr : code) {
std::cout << std::hex << instr << std::endl; // 打印机器码
}
// 1) 计算各部分大小/偏移
const uint32_t segcmd_sz = sizeof(segment_command_64) + sizeof(section_64);
const uint32_t symcmd_sz = sizeof(symtab_command);
const uint32_t ncmds = 2; // __TEXT + SYMTAB
const uint32_t sizeofcmds = segcmd_sz + symcmd_sz;
const uint32_t fileoff_code = sizeof(mach_header_64) + sizeofcmds;
const uint32_t codesize = uint32_t(code.size() * 4);
// 字符串表: "\0_main\0"
std::vector<char> strtab = {'\0','_','m','a','i','n','\0'};
const uint32_t nsyms = 1;
const uint32_t symoff = fileoff_code + codesize;
std::cout << "Symbol offset: " << symoff << std::endl;
const uint32_t stroff = symoff + nsyms * sizeof(nlist_64);
const uint32_t strsize = uint32_t(strtab.size());
// 2) Mach-O Header
mach_header_64 mh{};
mh.magic = MH_MAGIC_64;
mh.cputype = CPU_TYPE_ARM64;
mh.cpusubtype = CPU_SUBTYPE_ARM64_ALL;
mh.filetype = MH_OBJECT;
mh.ncmds = ncmds;
mh.sizeofcmds = sizeofcmds;
mh.flags = 0;
mh.reserved = 0;
// 3) LC_SEGMENT_64 for __TEXT/__text
segment_command_64 sc{};
sc.cmd = LC_SEGMENT_64;
sc.cmdsize = segcmd_sz;
std::strncpy(sc.segname, "__TEXT", sizeof(sc.segname));
sc.vmaddr = 0;
sc.vmsize = codesize;
sc.fileoff = fileoff_code;
sc.filesize = codesize;
sc.maxprot = VM_PROT_READ | VM_PROT_WRITE;
sc.initprot = VM_PROT_READ | VM_PROT_WRITE;
sc.nsects = 1;
sc.flags = 0;
section_64 sect{};
std::strncpy(sect.sectname, "__text", sizeof(sect.sectname));
std::strncpy(sect.segname, "__TEXT", sizeof(sect.segname));
sect.addr = 0;
sect.size = codesize;
sect.offset = fileoff_code;
sect.align = 2; // 4-byte align
sect.reloff = 0;
sect.nreloc = 0;
sect.flags = 0;
sect.reserved1 = sect.reserved2 = sect.reserved3 = 0;
// 4) LC_SYMTAB
symtab_command stc{};
stc.cmd = LC_SYMTAB;
stc.cmdsize = symcmd_sz;
stc.symoff = symoff;
stc.nsyms = nsyms;
stc.stroff = stroff;
stc.strsize = strsize;
// 5) 写入 header + load commands + section
std::ofstream ofs(filename, std::ios::binary);
if (!ofs) return false;
ofs.write(reinterpret_cast<char*>(&mh), sizeof(mh));
ofs.write(reinterpret_cast<char*>(&sc), sizeof(segment_command_64));
ofs.write(reinterpret_cast<char*>(&sect), sizeof(section_64));
ofs.write(reinterpret_cast<char*>(&stc), sizeof(stc));
// 6) 写入机器码
ofs.seekp(fileoff_code, std::ios::beg);
for (auto instr : code) {
ofs.put(char(instr & 0xFF));
ofs.put(char((instr >> 8) & 0xFF));
ofs.put(char((instr >> 16) & 0xFF));
ofs.put(char((instr >> 24) & 0xFF));
}
// 7) 写入符号表 (_main)
ofs.seekp(symoff, std::ios::beg);
nlist_64 nl{};
nl.n_un.n_strx = 1; // "_main" 在 strtab 的偏移
nl.n_type = N_SECT | N_EXT; // 定义在 section、对外可见
nl.n_sect = 1; // 第一节 __text
nl.n_desc = 0;
nl.n_value = 0; // 从节首开始
ofs.write(reinterpret_cast<char*>(&nl), sizeof(nl));
// 8) 写入字符串表
ofs.seekp(stroff, std::ios::beg);
ofs.write(strtab.data(), strtab.size());
return true;
}

18
ObjectFileGenerator.h Normal file
View File

@@ -0,0 +1,18 @@
#ifndef OBJECT_FILE_GENERATOR_H
#define OBJECT_FILE_GENERATOR_H
#include <vector>
#include <cstdint>
#include <string>
/// 把一段 32-bit ARM64 指令流写成一个 Mach-O 64-bit relocatable object (.o) 文件
class ObjectFileGenerator {
public:
/// \param filename 输出文件名,比如 "out.o"
/// \param code 每个元素是一条 32-bit 小端机器指令
/// \return true 成功false 失败
static bool writeObject(const std::string &filename,
const std::vector<uint32_t> &code);
};
#endif // OBJECT_FILE_GENERATOR_H

411
PrettyPrinter.cpp Normal file
View File

@@ -0,0 +1,411 @@
// PrettyPrinter.cpp
#include "PrettyPrinter.h"
void PrettyPrinter::print(ASTNode* root) {
printNode(root);
out.flush();
}
void PrettyPrinter::indent(){
out << std::string(indentLevel * indentWidth, ' ');
}
void PrettyPrinter::newline(){
out << "\n";
}
void PrettyPrinter::withIndent(const std::function<void()>& f){
indentLevel++;
f();
indentLevel--;
}
bool PrettyPrinter::isStmt(ASTTag t){
using T = ASTTag;
return t==T::IF_STMT || t==T::FOR_STMT || t==T::FOR_DECL_STMT
|| t==T::WHILE_STMT|| t==T::RETURN_STMT
|| t==T::EXPR_STMT || t==T::COMPOUND_STMT
|| t==T::DECL_STMT;
}
void PrettyPrinter::printNode(ASTNode* n){
if (!n) return;
switch (n->tag) {
case ASTTag::TRANSL_UNIT: printTranslationUnit(n); break;
case ASTTag::FUNCTION_DEF: printFunctionDef(n); break;
case ASTTag::DECL_STMT: printDeclStmt(n); break;
case ASTTag::DECLARATION:
indent();
printDeclaration(n);
out << ";";
newline();
break;
case ASTTag::IF_STMT: printIfStmt(n); break;
case ASTTag::FOR_STMT:
case ASTTag::FOR_DECL_STMT: printForStmt(n); break;
case ASTTag::WHILE_STMT: printWhileStmt(n); break;
case ASTTag::RETURN_STMT: printReturnStmt(n); break;
case ASTTag::COMPOUND_STMT: printCompoundStmt(n); break;
case ASTTag::EXPR_STMT:
indent();
// EXPR_STMT.kids[0] 可能是 nullptr空语句或者表达式
if (!n->kids.empty() && n->kids[0]) {
printExpr(n->kids[0]);
}
out << ";";
newline();
break;
default:
if (isStmt(n->tag)) {
// 其它语句回退到 “表达式语句” 的处理
indent();
printExpr(n);
out << ";";
newline();
} else {
printExpr(n);
}
}
}
// ── 顶层翻译单元 ──
void PrettyPrinter::printTranslationUnit(ASTNode* n){
for (auto child : n->kids){
printNode(child);
newline();
}
}
// ── 函数定义 ──
// kids: [spec_list, declarator, (opt decl_list), compound_stmt]
void PrettyPrinter::printFunctionDef(ASTNode* n){
indent();
printDeclSpec(n->kids[0]);
out<<" ";
printDeclarator(n->kids[1]); // 包含函数名和参数
out<<" ";
printCompoundStmt(n->kids.back());
newline();
}
// ── 声明语句、声明 ──
void PrettyPrinter::printDeclStmt(ASTNode* n){
indent();
printDeclaration(n->kids[0]);
out<<";";
newline();
}
void PrettyPrinter::printDeclaration(ASTNode* n) {
// kids: [spec_list, init_decl_list]
printDeclSpec(n->kids[0]);
// 如果有初始化列表,就输出空格并专门打印它
if (n->kids.size() > 1 && n->kids[1]) {
out << " ";
printInitDeclList(n->kids[1]);
}
}
// ── If ──
void PrettyPrinter::printIfStmt(ASTNode* n){
indent(); out<<"if ("; printExpr(n->kids[0]); out<<") ";
if (n->kids[1]->tag==ASTTag::COMPOUND_STMT){
printCompoundStmt(n->kids[1]);
} else {
newline();
withIndent([&](){ printNode(n->kids[1]); });
}
if (n->kids.size()==3){
indent(); out<<"else ";
if (n->kids[2]->tag==ASTTag::COMPOUND_STMT)
printCompoundStmt(n->kids[2]);
else {
newline();
withIndent([&](){ printNode(n->kids[2]); });
}
}
}
// ── For ──
// kids: [init, cond, iter?, body]
void PrettyPrinter::printForStmt(ASTNode* n){
indent(); out<<"for (";
if (n->kids[0]) {
if (n->kids[0]->tag == ASTTag::DECLARATION) {
printDeclaration(n->kids[0]);
} else {
ASTNode* initNode = n->kids[0];
if (initNode->tag == ASTTag::EXPR_STMT
&& !initNode->kids.empty()
&& initNode->kids[0])
initNode = initNode->kids[0];
printExpr(initNode);
}
}
out<<"; ";
// cond: 通常是 EXPR_STMT 包装
if (n->kids[1]) {
ASTNode* condNode = n->kids[1];
if (condNode->tag == ASTTag::EXPR_STMT
&& !condNode->kids.empty()
&& condNode->kids[0])
condNode = condNode->kids[0];
printExpr(condNode);
}
out<<"; ";
// ASTNode* iter = (n->kids.size()>2 ? n->kids[2] : nullptr);
// if (iter) printExpr(iter);
// iter: 有时也是 EXPR_STMT有时直接是表达式
if (n->kids.size() > 2 && n->kids[2]) {
ASTNode* iterNode = n->kids[2];
if (iterNode->tag == ASTTag::EXPR_STMT
&& !iterNode->kids.empty()
&& iterNode->kids[0])
iterNode = iterNode->kids[0];
printExpr(iterNode);
}
out<<") ";
if (n->kids.back()->tag==ASTTag::COMPOUND_STMT)
printCompoundStmt(n->kids.back());
else {
newline();
withIndent([&](){ printNode(n->kids.back()); });
}
}
// ── While ──
void PrettyPrinter::printWhileStmt(ASTNode* n){
indent(); out<<"while ("; printExpr(n->kids[0]); out<<") ";
if (n->kids[1]->tag==ASTTag::COMPOUND_STMT)
printCompoundStmt(n->kids[1]);
else {
newline();
withIndent([&](){ printNode(n->kids[1]); });
}
}
// ── Return ──
void PrettyPrinter::printReturnStmt(ASTNode* n){
indent(); out<<"return";
if (!n->kids.empty()){
out<<" ";
printExpr(n->kids[0]);
}
out<<";"; newline();
}
// ── Compound ──
void PrettyPrinter::printCompoundStmt(ASTNode* n) {
indent();
out << "{";
newline();
withIndent([&]() {
// 先检查是否真的是一个 BLOCK_ITEM_LIST
if (!n->kids.empty()
&& n->kids[0]
&& n->kids[0]->tag == ASTTag::BLOCK_ITEM_LIST) {
ASTNode* blockList = n->kids[0];
for (ASTNode* item : blockList->kids) {
if (!item) continue;
switch (item->tag) {
case ASTTag::BLOCK_DECL:
// 声明包装:拆开并打印
indent();
printDeclaration(item->kids[0]);
out << ";";
newline();
break;
case ASTTag::BLOCK_STMT:
// 语句包装:取出里面的真正 stmt
if (!item->kids.empty() && item->kids[0]) {
printNode(item->kids[0]);
}
break;
default:
// 其它直接递归
printNode(item);
}
}
} else {
// 回退:如果 kids[0] 不是 BLOCK_ITEM_LIST就打印所有子节点
for (ASTNode* child : n->kids) {
if (child) printNode(child);
}
}
});
indent();
out << "}";
newline();
}
// ── 声明符 & 类型相关 ──
// spec_list: kids 是一连串枚举值
void PrettyPrinter::printDeclSpec(ASTNode* n){
if (!n) return;
bool first = true;
for (ASTNode* leaf : n->kids) {
// 跳过 nullptr 或者不是 SPECIFIER 的节点
if (!leaf || leaf->tag != ASTTag::SPECIFIER)
continue;
if (!first) out << " ";
first = false;
int spec = leaf->ival;
switch (spec) {
case TS_INT: out << "int"; break;
case TS_CHAR: out << "char"; break;
case TS_VOID: out << "void"; break;
// … 其他类型 …
default: out << "/*spec" << spec << "*/";
}
}
}
// declarator: 可能包含指针、标识符、参数列表
void PrettyPrinter::printDeclarator(ASTNode* n){
// 简化处理:直接从 text 中读函数名或变量名
if (!n->text.empty()){
out<<n->text;
} else if (!n->kids.empty()){
// 对于 FUNC_DECL: kids[0] 是 DECLARATOR带函数名kids[1] 是 PARAM_LIST
if (n->tag==ASTTag::FUNC_DECL){
printDeclarator(n->kids[0]);
out<<"(";
if (n->kids.size()>1) printParamList(n->kids[1]);
out<<")";
return;
} else {
printDeclarator(n->kids[0]);
}
}
}
// 参数列表
void PrettyPrinter::printParamList(ASTNode* n){
bool first=true;
for (auto pd : n->kids){
if (!first) out<<", ";
first=false;
// pd: PARAM_DECL 节点kids[0] 是 spec_listkids[1] 可选 declarator
printDeclSpec(pd->kids[0]);
if (pd->kids.size()>1){
out<<" ";
printDeclarator(pd->kids[1]);
}
}
}
// ── 表达式打印 ──
void PrettyPrinter::printExpr(ASTNode* n){
using T = ASTTag;
switch (n->tag){
case T::ID:
out<<n->text; break;
case T::CONST:
out<<n->text; break;
case T::BINARY:
printExpr(n->kids[0]);
out<<" "<< static_cast<char>(n->ival) <<" ";
printExpr(n->kids[1]);
break;
case T::UNARY:{
int op = n->ival;
const char* s = op=='-'? "-" : op=='+'? "+" : "";
printUnary(n, s, true);
break;
}
case T::ASSIGN:
printExpr(n->kids[0]);
out<<" = ";
printExpr(n->kids[1]);
break;
case T::FUNC_CALL:
printFuncCall(n);
break;
case T::ARRAY_REF:
printArrayRef(n);
break;
case T::STRUCT_REF:
printStructRef(n);
break;
case T::INIT_EXPR:
// 把初始化表达式里的真实表达式解包再打印
if (!n->kids.empty() && n->kids[0])
printExpr(n->kids[0]);
break;
default:
out<<"/*unhandled:"<<(int)n->tag<<"*/";
}
}
void PrettyPrinter::printUnary(ASTNode* n, const char* op, bool prefix){
if (prefix) out<<op;
printExpr(n->kids[0]);
if (!prefix) out<<op;
}
void PrettyPrinter::printFuncCall(ASTNode* n){
printExpr(n->kids[0]);
out<<"(";
if (n->kids.size()>1) printExpr(n->kids[1]);
out<<")";
}
void PrettyPrinter::printArrayRef(ASTNode* n){
printExpr(n->kids[0]);
out<<"[";
printExpr(n->kids[1]);
out<<"]";
}
void PrettyPrinter::printStructRef(ASTNode* n){
printExpr(n->kids[0]);
out<<(n->flag? "->" : ".");
out<<n->text;
}
void PrettyPrinter::printInitDeclList(ASTNode* n) {
if (!n) return;
bool first = true;
for (ASTNode* initDecl : n->kids) {
if (!initDecl) continue;
if (!first) out << ", ";
first = false;
// declarator
ASTNode* dtor = initDecl->kids[0];
printDeclarator(dtor);
// // initializer?
// if (initDecl->kids.size() > 1 && initDecl->kids[1]) {
// out << " = ";
// printExpr(initDecl->kids[1]);
// }
// initializer?
if (initDecl->kids.size() > 1 && initDecl->kids[1]) {
out << " = ";
ASTNode* initNode = initDecl->kids[1];
// 如果是 INIT_EXPR解包再打印
if (initNode->tag == ASTTag::INIT_EXPR
&& !initNode->kids.empty()
&& initNode->kids[0])
initNode = initNode->kids[0];
printExpr(initNode);
}
}
}

57
PrettyPrinter.h Normal file
View File

@@ -0,0 +1,57 @@
// PrettyPrinter.h
#ifndef PRETTY_PRINTER_H
#define PRETTY_PRINTER_H
#include "ast.h"
#include <ostream>
#include <functional>
class PrettyPrinter {
public:
explicit PrettyPrinter(std::ostream& os, int indentWidth = 4)
: out(os), indentWidth(indentWidth) {}
void print(ASTNode* root);
private:
std::ostream& out;
int indentLevel = 0;
int indentWidth;
void indent();
void newline();
void withIndent(const std::function<void()>& f);
void printNode(ASTNode* n);
// 语句 & 翻译单元
void printTranslationUnit(ASTNode* n);
void printFunctionDef(ASTNode* n);
void printDeclStmt(ASTNode* n);
void printDeclaration(ASTNode* n);
void printIfStmt(ASTNode* n);
void printForStmt(ASTNode* n);
void printWhileStmt(ASTNode* n);
void printReturnStmt(ASTNode* n);
void printCompoundStmt(ASTNode* n);
void printInitDeclList(ASTNode* n);
// 表达式
void printExpr(ASTNode* n);
void printBinary(ASTNode* n, const char* op);
void printUnary(ASTNode* n, const char* op, bool prefix=true);
void printFuncCall(ASTNode* n);
void printArrayRef(ASTNode* n);
void printStructRef(ASTNode* n);
// 声明 & 类型
void printDeclSpec(ASTNode* n);
void printDeclarator(ASTNode* n);
void printParamList(ASTNode* n);
// 辅助
bool isStmt(ASTTag tag);
};
#endif // PRETTY_PRINTER_H

809
ast.cpp Normal file
View File

@@ -0,0 +1,809 @@
#include "ast.h"
#include <cstdlib> // strtoll
#include <cstring>
/* ──────────────────────────
* 内部小工具
* ──────────────────────────*/
namespace {
inline ASTNode* make(ASTTag tag) {
return new ASTNode(tag);
}
// 若左节点已经是某 tag 的列表,则直接 push否则新建列表结点
ASTNode* append_to_list(ASTTag tag, ASTNode* list_or_node, ASTNode* item) {
if (list_or_node && list_or_node->tag == tag) {
list_or_node->kids.push_back(item);
return list_or_node;
}
auto* n = make(tag);
n->kids.reserve(4);
n->kids.push_back(list_or_node);
n->kids.push_back(item);
return n;
}
}
/* ──────────────────────────
* 基本原子节点
* ──────────────────────────*/
ASTNode* new_id_node(const char* name, SourceLoc loc)
{
auto* n = make(ASTTag::ID);
n->loc = loc;
n->text = name;
return n;
}
ASTNode* new_const_node(const char* literal, SourceLoc loc)
{
auto* n = make(ASTTag::CONST);
n->loc = loc;
n->text = literal;
char* endp = nullptr;
n->ival = std::strtoll(literal, &endp, 0);
return n;
}
ASTNode* new_string_node(const char* literal, SourceLoc loc)
{
auto* n = make(ASTTag::STRING);
n->loc = loc;
n->text = literal;
return n;
}
/* ──────────────────────────
* 后缀、调用、成员访问
* ──────────────────────────*/
ASTNode* new_array_ref_node(ASTNode* array, ASTNode* index, SourceLoc loc)
{
auto* n = make(ASTTag::ARRAY_REF);
n->kids = {array, index};
n->loc = loc;
return n;
}
ASTNode* new_func_call_node(ASTNode* callee, ASTNode* arg_list, SourceLoc loc)
{
auto* n = make(ASTTag::FUNC_CALL);
n->kids = {callee};
n->loc = loc;
if (arg_list) n->kids.push_back(arg_list);
return n;
}
ASTNode* new_struct_ref_node(ASTNode* base, const char* field, bool is_ptr_op, SourceLoc loc)
{
auto* n = make(ASTTag::STRUCT_REF);
n->kids = {base};
n->loc = loc;
n->text = field;
n->flag = is_ptr_op;
return n;
}
/* ──────────────────────────
* 自增 / 复合字面量
* ──────────────────────────*/
ASTNode* new_post_inc_node(ASTNode* expr, SourceLoc loc) {
auto* n = make(ASTTag::POST_INC);
n->kids = {expr};
n->loc = loc;
return n;
}
ASTNode* new_post_dec_node(ASTNode* expr, SourceLoc loc) {
auto* n = make(ASTTag::POST_DEC);
n->kids = {expr};
n->loc = loc;
return n;
}
ASTNode* new_pre_inc_node (ASTNode* expr, SourceLoc loc) {
auto* n = make(ASTTag::PRE_INC );
n->kids = {expr};
n->loc = loc;
return n;
}
ASTNode* new_pre_dec_node (ASTNode* expr, SourceLoc loc) {
auto* n = make(ASTTag::PRE_DEC );
n->kids = {expr};
n->loc = loc;
return n;
}
ASTNode* new_compound_literal_node(ASTNode* type_name, ASTNode* init_list, SourceLoc loc)
{
auto* n = make(ASTTag::COMPOUND_LITERAL);
n->kids = {type_name, init_list};
n->loc = loc;
return n;
}
/* ──────────────────────────
* 参数 / 实参与逗号表达式
* ──────────────────────────*/
ASTNode* new_arg_list(ASTNode* first_arg, SourceLoc loc)
{
auto* n = make(ASTTag::ARG_LIST);
n->kids.push_back(first_arg);
n->loc = loc;
return n;
}
ASTNode* append_arg_list(ASTNode* list, ASTNode* arg, SourceLoc loc)
{
ASTNode* n = append_to_list(ASTTag::ARG_LIST, list, arg);
n->loc = loc;
return n;
}
ASTNode* new_expr_list(ASTNode* left, ASTNode* right, SourceLoc loc)
{
ASTNode* n = append_to_list(ASTTag::ARG_LIST, left, right);
n->loc = loc;
return n;
}
/* ──────────────────────────
* 一元 / 二元 / 条件 / 逻辑 / 赋值
* ──────────────────────────*/
ASTNode* new_unary_op_node(int op, ASTNode* operand, SourceLoc loc)
{
auto* n = make(ASTTag::UNARY);
n->loc = loc;
n->ival = op;
n->kids = {operand};
return n;
}
ASTNode* new_sizeof_node(ASTNode* target, bool is_type_name, SourceLoc loc)
{
auto* n = make(ASTTag::SIZEOF_EXPR);
n->flag = is_type_name;
n->loc = loc;
n->kids = {target};
return n;
}
ASTNode* new_cast_node(ASTNode* type_name, ASTNode* expr, SourceLoc loc)
{
auto* n = make(ASTTag::CAST_EXPR);
n->kids = {type_name, expr};
n->loc = loc;
return n;
}
ASTNode* new_binop_node(int op, ASTNode* lhs, ASTNode* rhs, SourceLoc loc)
{
auto* n = make(ASTTag::BINARY);
n->loc = loc;
n->ival = op;
n->kids = {lhs, rhs};
return n;
}
ASTNode* new_logical_and_node(ASTNode* lhs, ASTNode* rhs, SourceLoc loc)
{
auto* n = make(ASTTag::LOGIC_AND);
n->kids = {lhs, rhs};
n->loc = loc;
return n;
}
ASTNode* new_logical_or_node(ASTNode* lhs, ASTNode* rhs, SourceLoc loc)
{
auto* n = make(ASTTag::LOGIC_OR);
n->kids = {lhs, rhs};
n->loc = loc;
return n;
}
ASTNode* new_conditional_node(ASTNode* cond, ASTNode* then_expr, ASTNode* else_expr, SourceLoc loc)
{
auto* n = make(ASTTag::COND);
n->kids = {cond, then_expr, else_expr};
n->loc = loc;
return n;
}
ASTNode* new_assign_node(ASTNode* lhs, int op, ASTNode* rhs, SourceLoc loc)
{
auto* n = make(ASTTag::ASSIGN);
n->loc = loc;
n->ival = op;
n->kids = {lhs, rhs};
return n;
}
/* ──────────────────────────
* 4. 声明 & 类型系统
* ──────────────────────────*/
static ASTNode* new_leaf(ASTTag list_tag, int enum_val, SourceLoc loc)
{
auto* leaf = make(ASTTag::SPECIFIER);
leaf->ival = enum_val;
leaf->loc = loc;
return append_to_list(list_tag, nullptr, leaf);
}
/*—— 声明与说明符 ——*/
ASTNode* new_spec_list(int spec, SourceLoc loc) {
return new_leaf(ASTTag::SPEC_LIST, spec, loc);
}
ASTNode* append_spec_list(ASTNode* list, int spec, SourceLoc loc) {
return append_to_list(ASTTag::SPEC_LIST, list, new_leaf(ASTTag::SPEC_LIST, spec, loc)->kids[0]);
}
ASTNode* new_declaration(ASTNode* spec_list, ASTNode* init_list, SourceLoc loc)
{
auto* n = make(ASTTag::DECLARATION);
n->kids = {spec_list};
n->loc = loc;
if (init_list) n->kids.push_back(init_list);
return n;
}
/*—— initdeclarator 列表 ——*/
ASTNode* new_init_decl(ASTNode* declarator, ASTNode* initializer, SourceLoc loc)
{
auto* n = make(ASTTag::INIT_DECL);
n->kids = {declarator};
n->loc = loc;
if (initializer) n->kids.push_back(initializer);
return n;
}
ASTNode* new_init_list(ASTNode* init_decl, SourceLoc loc) {
ASTNode* n = append_to_list(ASTTag::INIT_DECL_LIST,nullptr,init_decl);
n->loc = loc;
return n;
}
ASTNode* append_init_list(ASTNode* list, ASTNode* init_decl, SourceLoc loc) {
ASTNode* n = append_to_list(ASTTag::INIT_DECL_LIST,list,init_decl);
n->loc = loc;
return n;
}
/*—— struct / union / enum ——*/
ASTNode* new_struct_su_node(StructUnionKind su, const char* id, ASTNode* decl_list, SourceLoc loc)
{
auto* n = make(ASTTag::SU_SPEC);
n->ival = su;
n->loc = loc;
if (id) n->text = id;
if (decl_list) n->kids.push_back(decl_list);
return n;
}
ASTNode* new_sdecl_list(ASTNode* sdecl, SourceLoc loc) {
ASTNode* n = append_to_list(ASTTag::STRUCT_DECL_LIST,nullptr,sdecl);
n->loc = loc;
return n;
}
ASTNode* append_sdecl_list(ASTNode* list, ASTNode* sdecl, SourceLoc loc) {
ASTNode* n = append_to_list(ASTTag::STRUCT_DECL_LIST, list, sdecl);
n->loc = loc;
return n;
}
ASTNode* new_struct_decl(ASTNode* specq_list, ASTNode* sdecl_list, SourceLoc loc)
{
auto* n = make(ASTTag::STRUCT_DECL);
n->kids = {specq_list, sdecl_list};
n->loc = loc;
return n;
}
ASTNode* new_specq_list(int sq, SourceLoc loc) {
return new_leaf(ASTTag::SPECQ_LIST, sq, loc);
}
ASTNode* append_specq_list(ASTNode* list, int sq, SourceLoc loc) {
return append_to_list(ASTTag::SPECQ_LIST, list, new_leaf(ASTTag::SPECQ_LIST, sq, loc)->kids[0]);
}
ASTNode* new_sdeclarator_list(ASTNode* sdec, SourceLoc loc) {
ASTNode* n = append_to_list(ASTTag::STRUCT_DECLARATOR_LIST,nullptr,sdec);
n->loc = loc;
return n;
}
ASTNode* append_sdeclarator_list(ASTNode* list, ASTNode* sdec, SourceLoc loc) {
ASTNode* n = append_to_list(ASTTag::STRUCT_DECLARATOR_LIST, list, sdec);
n->loc = loc;
return n;
}
ASTNode* new_bitfield_node(ASTNode* declarator, ASTNode* width, SourceLoc loc)
{
auto* n = make(ASTTag::BITFIELD);
if (declarator) n->kids.push_back(declarator);
n->kids.push_back(width);
n->loc = loc;
return n;
}
ASTNode* new_enum_node(const char* id, ASTNode* enumerator_list, SourceLoc loc)
{
auto* n = make(ASTTag::ENUM_SPEC);
n->loc = loc;
if (id) n->text = id;
if (enumerator_list) n->kids.push_back(enumerator_list);
return n;
}
ASTNode* new_enum_const(const char* id, ASTNode* val, SourceLoc loc)
{
auto* n = make(ASTTag::ENUM_CONST);
n->text = id;
n->loc = loc;
if (val) n->kids.push_back(val);
return n;
}
ASTNode* new_enum_list(ASTNode* e, SourceLoc loc) {
ASTNode* n = append_to_list(ASTTag::ENUM_LIST,nullptr,e);
n->loc = loc;
return n;
}
ASTNode* append_enum_list(ASTNode* list, ASTNode* e, SourceLoc loc) {
ASTNode* n = append_to_list(ASTTag::ENUM_LIST, list,e);
n->loc = loc;
return n;
}
/*—— 类型限定符列表 ——*/
ASTNode* new_tq_list(int tq, SourceLoc loc) {
return new_leaf(ASTTag::TQ_LIST,tq, loc);
}
ASTNode* append_tq_list(ASTNode* list, int tq, SourceLoc loc) {
return append_to_list(ASTTag::TQ_LIST,list,new_leaf(ASTTag::TQ_LIST,tq, loc)->kids[0]);
}
/*—— declarator / pointer / array / func ——*/
ASTNode* new_declarator_node(ASTNode* pointer, ASTNode* direct_decl, SourceLoc loc)
{
auto* n = make(ASTTag::DECLARATOR);
if (pointer) n->kids.push_back(pointer);
n->kids.push_back(direct_decl);
n->loc = loc;
return n;
}
ASTNode* new_decl_ident(const char* id, SourceLoc loc)
{
auto* n = make(ASTTag::DECLARATOR);
n->text = id;
n->loc = loc;
return n;
}
ASTNode* new_array_decl(ASTNode* decl, ASTNode* tq_list,
ASTNode* size_expr, bool is_static, bool is_star, SourceLoc loc)
{
auto* n = make(ASTTag::ARRAY_DECL);
n->kids = {decl};
n->loc = loc;
if (tq_list) n->kids.push_back(tq_list);
if (size_expr) n->kids.push_back(size_expr);
n->flag = is_static || is_star; // 粗略放一起,用位标记也行
return n;
}
ASTNode* new_func_decl(ASTNode* decl, ASTNode* param_type_list, SourceLoc loc)
{
auto* n = make(ASTTag::FUNC_DECL);
n->kids = {decl};
n->loc = loc;
if (param_type_list) n->kids.push_back(param_type_list);
return n;
}
ASTNode* new_oldstyle_func_decl(ASTNode* decl, ASTNode* id_list, SourceLoc loc)
{
auto* n = make(ASTTag::OLD_FUNC_DECL);
n->kids = {decl, id_list};
n->loc = loc;
return n;
}
ASTNode* new_pointer(ASTNode* tq_list, SourceLoc loc)
{
auto* n = make(ASTTag::POINTER);
n->loc = loc;
if (tq_list) n->kids.push_back(tq_list);
return n;
}
ASTNode* prepend_pointer(ASTNode* tq_list, ASTNode* existing, SourceLoc loc)
{
auto* n = make(ASTTag::POINTER);
n->loc = loc;
if (tq_list) n->kids.push_back(tq_list);
n->kids.push_back(existing);
return n;
}
/*—— 参数列表 & 声明 ——*/
ASTNode* new_param_decl(ASTNode* decl_spec, ASTNode* declarator, SourceLoc loc)
{
auto* n = make(ASTTag::PARAM_DECL);
n->kids = {decl_spec};
n->loc = loc;
if (declarator) n->kids.push_back(declarator);
return n;
}
ASTNode* new_param_list(ASTNode* p, SourceLoc loc) {
ASTNode* n = append_to_list(ASTTag::PARAM_LIST,nullptr,p);
n->loc = loc;
return n;
}
ASTNode* append_param_list(ASTNode* list, ASTNode* p ,SourceLoc loc) {
ASTNode* n = append_to_list(ASTTag::PARAM_LIST,list,p);
n->loc = loc;
return n;
}
ASTNode* new_param_list_ellipsis(ASTNode* list, SourceLoc loc)
{
auto* ellip = make(ASTTag::PARAM_LIST_ELIPS);
ellip->loc = loc;
return append_to_list(ASTTag::PARAM_LIST, list, ellip);
}
/*—— 标识符列表 ——*/
ASTNode* new_id_list(const char* id, SourceLoc loc) {
return append_to_list(ASTTag::ID_LIST,nullptr,new_id_node(id, loc));
}
ASTNode* append_id_list(ASTNode* list, const char* id, SourceLoc loc) {
return append_to_list(ASTTag::ID_LIST,list,new_id_node(id, loc));
}
/*—— typename / 抽象声明符 ——*/
ASTNode* new_type_name(ASTNode* specq_list, ASTNode* abs_decl, SourceLoc loc)
{
auto* n = make(ASTTag::TYPE_NAME_NODE);
n->kids = {specq_list};
n->loc = loc;
if (abs_decl) n->kids.push_back(abs_decl);
return n;
}
ASTNode* new_abs_decl(ASTNode* pointer, ASTNode* direct_abs_decl, SourceLoc loc)
{
auto* n = make(ASTTag::ABS_DECL);
n->loc = loc;
if (pointer) n->kids.push_back(pointer);
if (direct_abs_decl) n->kids.push_back(direct_abs_decl);
return n;
}
ASTNode* new_abs_array(ASTNode* size, bool is_star, SourceLoc loc)
{
auto* n = make(ASTTag::ABS_ARRAY);
n->loc = loc;
if (size) n->kids.push_back(size);
n->flag = is_star;
return n;
}
ASTNode* new_abs_array_child(ASTNode* parent, ASTNode* size, bool is_star, SourceLoc loc)
{
ASTNode* n = append_to_list(ASTTag::ABS_ARRAY_CHILD, parent, new_abs_array(size,is_star, loc));
n->loc = loc;
return n;
}
ASTNode* new_abs_func(ASTNode* param_list, SourceLoc loc)
{
auto* n = make(ASTTag::ABS_FUNC);
n->loc = loc;
if (param_list) n->kids.push_back(param_list);
return n;
}
ASTNode* new_abs_func_child(ASTNode* parent, ASTNode* param_list, SourceLoc loc)
{
ASTNode* n = append_to_list(ASTTag::ABS_FUNC_CHILD, parent, new_abs_func(param_list, loc));
n->loc = loc;
return n;
}
/* ──────────────────────────
* 5. 初始化 & 设计化初始化
* ──────────────────────────*/
ASTNode* new_init_expr(ASTNode* expr, SourceLoc loc) {
auto* n = make(ASTTag::INIT_EXPR);
n->kids = {expr};
n->loc = loc;
return n;
}
ASTNode* new_init_item_list(ASTNode* init, SourceLoc loc) {
ASTNode* n = append_to_list(ASTTag::INIT_ITEM_LIST,nullptr,init);
n->loc = loc;
return n;
}
ASTNode* append_init_item(ASTNode* list, ASTNode* init, SourceLoc loc) {
ASTNode* n = append_to_list(ASTTag::INIT_ITEM_LIST,list,init);
n->loc = loc;
return n;
}
ASTNode* new_init_list_node(ASTNode* item_list, SourceLoc loc)
{
auto* n = make(ASTTag::INIT_LIST_NODE);
n->kids = {item_list};
n->loc = loc;
return n;
}
/*── 指定化初始化器 ──*/
ASTNode* new_designator_list(ASTNode* d, SourceLoc loc) {
ASTNode* n = append_to_list(ASTTag::DESIGNATOR_LIST,nullptr,d);
n->loc = loc;
return n;
}
ASTNode* append_designator_list(ASTNode* list, ASTNode* d, SourceLoc loc) {
ASTNode* n = append_to_list(ASTTag::DESIGNATOR_LIST,list,d);
n->loc = loc;
return n;
}
ASTNode* new_array_designator(ASTNode* const_expr, SourceLoc loc)
{
auto* n = make(ASTTag::ARRAY_DESIGNATOR);
n->kids = {const_expr};
n->loc = loc;
return n;
}
ASTNode* new_field_designator(const char* field, SourceLoc loc)
{
auto* n = make(ASTTag::FIELD_DESIGNATOR);
n->text = field;
n->loc = loc;
return n;
}
ASTNode* new_designated_init(ASTNode* designator_list, ASTNode* initializer, SourceLoc loc)
{
auto* n = make(ASTTag::DESIGNATED_INIT);
n->kids = {designator_list, initializer};
n->loc = loc;
return n;
}
/* ──────────────────────────
* 6. 语句
* ──────────────────────────*/
ASTNode* new_labeled_stmt_id(const char* id, ASTNode* stmt, SourceLoc loc) {
auto* n = make(ASTTag::LABELED_ID_STMT);
n->text=id;
n->loc = loc;
n->kids={stmt};
return n;
}
ASTNode* new_case_stmt(ASTNode* expr, ASTNode* stmt, SourceLoc loc) {
auto* n = make(ASTTag::CASE_STMT);
n->kids={expr,stmt};
n->loc = loc;
return n;
}
ASTNode* new_default_stmt(ASTNode* stmt, SourceLoc loc) {
auto* n = make(ASTTag::DEFAULT_STMT);
n->kids={stmt};
n->loc = loc;
return n;
}
ASTNode* new_compound_stmt(ASTNode* block_item_list, SourceLoc loc)
{
auto* n = make(ASTTag::COMPOUND_STMT);
n->loc = loc;
if (block_item_list) n->kids.push_back(block_item_list);
return n;
}
ASTNode* new_block_item_list(ASTNode* item, SourceLoc loc) {
ASTNode* n = append_to_list(ASTTag::BLOCK_ITEM_LIST,nullptr,item);
n->loc = loc;
return n;
}
ASTNode* append_block_item_list(ASTNode* list, ASTNode* item, SourceLoc loc) {
ASTNode* n = append_to_list(ASTTag::BLOCK_ITEM_LIST,list,item);
n->loc = loc;
return n;
}
ASTNode* new_block_decl(ASTNode* decl, SourceLoc loc) {
auto* n = make(ASTTag::BLOCK_DECL);
n->kids={decl};
n->loc = loc;
return n;
}
ASTNode* new_block_stmt(ASTNode* stmt, SourceLoc loc) {
auto* n = make(ASTTag::BLOCK_STMT);
n->kids={stmt};
n->loc = loc;
return n;
}
ASTNode* new_expr_stmt(ASTNode* expr, SourceLoc loc) {
if(!expr) return make(ASTTag::EXPR_STMT);
auto* n=make(ASTTag::EXPR_STMT);
n->kids={expr};
n->loc = loc;
return n;
}
ASTNode* new_if_stmt(ASTNode* cond, ASTNode* then_s, ASTNode* else_s, SourceLoc loc)
{
auto* n = make(ASTTag::IF_STMT);
n->loc = loc;
n->kids = {cond, then_s};
if (else_s) n->kids.push_back(else_s);
return n;
}
ASTNode* new_switch_stmt(ASTNode* expr, ASTNode* body, SourceLoc loc) {
auto* n = make(ASTTag::SWITCH_STMT);
n->kids={expr,body};
n->loc = loc;
return n;
}
ASTNode* new_while_stmt(ASTNode* cond, ASTNode* body, SourceLoc loc) {
auto* n = make(ASTTag::WHILE_STMT);
n->kids={cond,body};
n->loc = loc;
return n;
}
ASTNode* new_do_while_stmt(ASTNode* body, ASTNode* cond, SourceLoc loc) {
auto* n = make(ASTTag::DO_WHILE_STMT);
n->kids={body,cond};
n->loc = loc;
return n;
}
ASTNode* new_for_stmt(ASTNode* init, ASTNode* cond, ASTNode* iter, ASTNode* body, SourceLoc loc)
{
auto* n = make(ASTTag::FOR_STMT);
n->kids = {init, cond};
n->loc = loc;
if (iter) n->kids.push_back(iter);
n->kids.push_back(body);
return n;
}
ASTNode* new_for_decl_stmt(ASTNode* decl, ASTNode* cond, ASTNode* iter, ASTNode* body, SourceLoc loc)
{
auto* n = make(ASTTag::FOR_DECL_STMT);
n->kids = {decl, cond};
n->loc = loc;
if (iter) n->kids.push_back(iter);
n->kids.push_back(body);
return n;
}
ASTNode* new_goto_stmt(const char* id, SourceLoc loc) {
auto* n = make(ASTTag::GOTO_STMT);
n->text=id;
n->loc = loc;
return n;
}
ASTNode* new_continue_stmt(SourceLoc loc) {
auto* n = make(ASTTag::CONTINUE_STMT);
n->loc = loc;
return n;
}
ASTNode* new_break_stmt(SourceLoc loc) {
auto* n = make(ASTTag::BREAK_STMT);
n->loc = loc;
return n;
}
ASTNode* new_return_stmt(ASTNode* expr, SourceLoc loc) {
auto* n = make(ASTTag::RETURN_STMT);
n->loc = loc;
if(expr)n->kids={expr};
return n;
}
/* ──────────────────────────
* 7. 翻译单元 / 顶层
* ──────────────────────────*/
ASTNode* new_translation_unit(ASTNode* prev, ASTNode* ext_decl, SourceLoc loc)
{
if (!prev) {
auto* n=make(ASTTag::TRANSL_UNIT);
n->kids={ext_decl};
n->loc = loc;
return n;
}
prev->kids.push_back(ext_decl);
return prev;
}
ASTNode* new_decl_stmt(ASTNode* declaration, SourceLoc loc)
{
auto* n = make(ASTTag::DECL_STMT);
n->loc=loc;
n->kids={declaration};
return n;
}
ASTNode* new_function_def(ASTNode* decl_spec, ASTNode* declarator,
ASTNode* decl_list, ASTNode* comp_stmt, SourceLoc loc)
{
auto* n = make(ASTTag::FUNCTION_DEF);
n->loc=loc;
n->kids = {decl_spec, declarator};
if (decl_list) n->kids.push_back(decl_list);
n->kids.push_back(comp_stmt);
return n;
}
ASTNode* new_declaration_list(ASTNode* decl, SourceLoc loc) {
ASTNode* n = append_to_list(ASTTag::DECL_LIST,nullptr,decl);
n->loc=loc;
return n;
}
ASTNode* append_declaration_list(ASTNode* list, ASTNode* decl, SourceLoc loc) {
ASTNode* n = append_to_list(ASTTag::DECL_LIST,list,decl);
n->loc=loc;
return n;
}
/*───────── overload: enum ↔ node ─────────*/
ASTNode* new_spec_list(ASTNode* spec_node, SourceLoc loc)
{
ASTNode* n = append_to_list(ASTTag::SPEC_LIST, nullptr, spec_node);
n->loc=loc;
return n;
}
ASTNode* append_spec_list(ASTNode* list, ASTNode* spec_node, SourceLoc loc)
{
ASTNode* n = append_to_list(ASTTag::SPEC_LIST, list, spec_node);
n->loc=loc;
return n;
}
/* 设计化初始化: 追加 (designator = initializer) */
ASTNode* append_designated_init(ASTNode* list,
ASTNode* designator_list,
ASTNode* initializer, SourceLoc loc)
{
auto* di = new_designated_init(designator_list, initializer, loc);
return append_init_item(list, di, loc);
}
/* 若你用到了 specq_list同理加 overload */
ASTNode* new_specq_list(ASTNode* node, SourceLoc loc)
{
auto* n = append_to_list(ASTTag::SPECQ_LIST, nullptr, node);
n->loc=loc;
return n;
}
ASTNode* append_specq_list(ASTNode* list, ASTNode* node, SourceLoc loc)
{
ASTNode* n = append_to_list(ASTTag::SPECQ_LIST, list, node);
n->loc=loc;
return n;
}
/* ── spec_list overload (node) ───────────────────────────*/
// ASTNode* new_spec_list(ASTNode* spec_node) {
// return append_to_list(ASTTag::SPEC_LIST, nullptr, spec_node);
// }
// ASTNode* append_spec_list(ASTNode* list, ASTNode* spec_node) {
// return append_to_list(ASTTag::SPEC_LIST, list, spec_node);
// }
//
// /* ── specq_list overload (node) ─────────────────────────*/
// ASTNode* new_specq_list(ASTNode* node) {
// return append_to_list(ASTTag::SPECQ_LIST, nullptr, node);
// }
// ASTNode* append_specq_list(ASTNode* list, ASTNode* node) {
// return append_to_list(ASTTag::SPECQ_LIST, list, node);
// }
//
// /* ── 追加指定化初始化器 ───────────────────────────────*/
// ASTNode* append_designated_init(ASTNode* list,
// ASTNode* designator_list,
// ASTNode* initializer)
// {
// return append_init_item(list,
// new_designated_init(designator_list, initializer));
// }

325
ast.h Normal file
View File

@@ -0,0 +1,325 @@
#ifndef MINI_C_AST_H
#define MINI_C_AST_H
#include <string>
#include <vector>
#include <cstdint>
/*──────────────────────────────
* 0. 语义分析辅助
*─────────────────────────────*/
// 在 type.h 中定义具体的 Type 类
struct Type;
// 用于记录 AST 节点在源文件中的行、列位置
struct SourceLoc {
int line = 0;
int col = 0;
};
/*──────────────────────────────
* 1. 词法‑语法层枚举
*─────────────────────────────*/
//——单目运算符——
enum UnaryOp : int {
op_address, op_deref, op_unary_plus, op_neg,
op_bitnot, op_not
};
//——赋值运算符(含复合)——
enum AssignOp : int {
op_assign, op_mul_assign, op_div_assign, op_mod_assign,
op_add_assign, op_sub_assign, op_shl_assign, op_shr_assign,
op_and_assign, op_xor_assign, op_or_assign
};
//——额外二元运算符——
enum BinaryOp : int {
// ASCII 运算符直接用字面字符
SHL = 256, SHR,
LE, GE, EQ, NE
};
//——存储类别说明符——
enum StorageClass : int {
SC_TYPEDEF, SC_EXTERN, SC_STATIC, SC_AUTO, SC_REGISTER
};
//——基本类型说明符——
enum TypeSpecifier : int {
TS_VOID, TS_CHAR, TS_SHORT, TS_INT, TS_LONG,
TS_FLOAT, TS_DOUBLE, TS_SIGNED, TS_UNSIGNED,
TS_BOOL, TS_COMPLEX, TS_IMAGINARY,
TS_TYPE_NAME // typedefalias
};
//——struct / union——
enum StructUnionKind : int { SU_STRUCT, SU_UNION };
//——限定符 / 函数特性——
enum TypeQualifier : int { TQ_CONST, TQ_RESTRICT, TQ_VOLATILE };
enum FuncSpecifier : int { FS_INLINE };
/*──────────────────────────────
* 2. AST 结点与标签
*─────────────────────────────*/
enum class ASTTag : std::uint16_t {
/*── 基本 / 运算类 ──*/
ID, CONST, STRING,
UNARY, BINARY, LOGIC_AND, LOGIC_OR,
COND, ASSIGN,
ARRAY_REF, STRUCT_REF, FUNC_CALL,
SIZEOF_EXPR, CAST_EXPR, COMPOUND_LITERAL,
EXPR_LIST, ARG_LIST,
PRE_INC, PRE_DEC, // ← 新增
POST_INC, POST_DEC, // ← 新增
/*── 声明 & 类型 ──*/
SPECIFIER, SPEC_LIST, DECLARATION, INIT_DECL, INIT_DECL_LIST,
DECL_SPEC_LIST, DECLARATOR, POINTER_DECL,
ARRAY_DECL, FUNC_DECL, OLD_FUNC_DECL,
PARAM_DECL, PARAM_LIST, PARAM_LIST_ELIPS,
ID_LIST, TYPE_NAME_NODE,
ABS_DECL, ABS_ARRAY, ABS_ARRAY_CHILD,
ABS_FUNC, ABS_FUNC_CHILD,
POINTER, POINTER_CHAIN,
TQ_LIST,
/*── struct / enum ──*/
SU_SPEC, STRUCT_DECL, STRUCT_DECL_LIST,
STRUCT_DECLARATOR, STRUCT_DECLARATOR_LIST,
BITFIELD,
ENUM_SPEC, ENUM_LIST, ENUM_CONST,
SPECQ_LIST,
/*── 初始化 / 设计化初始化 ──*/
INIT_EXPR, INIT_LIST_NODE, INIT_ITEM_LIST,
DESIGNATOR_LIST, DESIGNATED_INIT,
ARRAY_DESIGNATOR, FIELD_DESIGNATOR,
/*── 语句 ──*/
LABELED_ID_STMT, CASE_STMT, DEFAULT_STMT,
COMPOUND_STMT, BLOCK_ITEM_LIST, BLOCK_DECL, BLOCK_STMT,
EXPR_STMT,
IF_STMT, SWITCH_STMT,
WHILE_STMT, DO_WHILE_STMT,
FOR_STMT, FOR_DECL_STMT,
GOTO_STMT, CONTINUE_STMT, BREAK_STMT, RETURN_STMT,
/*── 顶层 ──*/
TRANSL_UNIT, DECL_STMT, FUNCTION_DEF, DECL_LIST
};
struct ASTNode {
ASTTag tag;
std::string text; // 标识符
std::int64_t ival{}; // 数值或枚举值
bool flag{};
SourceLoc loc{}; // 源代码行列信息
Type* type{}; // 语义分析后填充的类型指针
std::vector<ASTNode*> kids; // 子节点(按动作顺序)
explicit ASTNode(ASTTag t, SourceLoc loc_ = SourceLoc{})
: tag(t), loc(loc_) {}
};
/* 方便在别处直接引用整棵语法树 */
extern ASTNode *ast_root;
/*──────────────────────────────
* 3. 表达式构造函数
*─────────────────────────────*/
ASTNode* new_id_node (const char* name, SourceLoc loc);
ASTNode* new_const_node(const char* literal, SourceLoc loc);
ASTNode* new_string_node(const char* literal, SourceLoc loc);
ASTNode* new_array_ref_node(ASTNode* array, ASTNode* index, SourceLoc loc);
ASTNode* new_func_call_node(ASTNode* callee, ASTNode* arg_list, SourceLoc loc);
ASTNode* new_struct_ref_node(ASTNode* base, const char* field, bool is_ptr_op, SourceLoc loc);
ASTNode* new_post_inc_node(ASTNode* expr, SourceLoc loc);
ASTNode* new_post_dec_node(ASTNode* expr, SourceLoc loc);
ASTNode* new_pre_inc_node(ASTNode* expr, SourceLoc loc);
ASTNode* new_pre_dec_node(ASTNode* expr, SourceLoc loc);
ASTNode* new_compound_literal_node(ASTNode* type_name, ASTNode* init_list, SourceLoc loc);
ASTNode* new_arg_list(ASTNode* first_arg, SourceLoc loc);
ASTNode* append_arg_list(ASTNode* list, ASTNode* arg, SourceLoc loc);
ASTNode* new_unary_op_node(int op, ASTNode* operand, SourceLoc loc);
ASTNode* new_sizeof_node(ASTNode* target, bool is_type_name, SourceLoc loc);
ASTNode* new_cast_node(ASTNode* type_name, ASTNode* expr, SourceLoc loc);
ASTNode* new_binop_node (int op, ASTNode* lhs, ASTNode* rhs, SourceLoc loc); // op 可为字符或 BinaryOp
ASTNode* new_logical_and_node(ASTNode* lhs, ASTNode* rhs, SourceLoc loc);
ASTNode* new_logical_or_node(ASTNode* lhs, ASTNode* rhs, SourceLoc loc);
ASTNode* new_conditional_node(ASTNode* cond, ASTNode* then_expr, ASTNode* else_expr, SourceLoc loc);
ASTNode* new_assign_node (ASTNode* lhs, int op, ASTNode* rhs, SourceLoc loc);
ASTNode* new_expr_list(ASTNode* left, ASTNode* right, SourceLoc loc);
/*──────────────────────────────
* 4. 声明 & 类型系统
*─────────────────────────────*/
ASTNode* new_declaration(ASTNode* spec_list, ASTNode* init_list, SourceLoc loc);
ASTNode* new_spec_list(int spec, SourceLoc loc); // 建立首节点
ASTNode* append_spec_list(ASTNode* list, int spec, SourceLoc loc); // 追加
ASTNode* new_init_list(ASTNode* init_decl, SourceLoc loc);
ASTNode* append_init_list(ASTNode* list, ASTNode* init_decl, SourceLoc loc);
ASTNode* new_init_decl(ASTNode* declarator, ASTNode* initializer, SourceLoc loc);
ASTNode* new_struct_su_node(StructUnionKind su, const char* id, ASTNode* decl_list, SourceLoc loc);
ASTNode* new_sdecl_list(ASTNode* sdecl, SourceLoc loc);
ASTNode* append_sdecl_list(ASTNode* list, ASTNode* sdecl, SourceLoc loc);
ASTNode* new_struct_decl(ASTNode* specq_list, ASTNode* sdecl_list, SourceLoc loc);
ASTNode* new_specq_list(int spec_or_qual, SourceLoc loc);
ASTNode* append_specq_list(ASTNode* list, int spec_or_qual, SourceLoc loc);
ASTNode* new_sdeclarator_list(ASTNode* sdecltor, SourceLoc loc);
ASTNode* append_sdeclarator_list(ASTNode* list, ASTNode* sdecltor, SourceLoc loc);
ASTNode* new_bitfield_node(ASTNode* declarator /*nullable*/,
ASTNode* width_expr, SourceLoc loc);
ASTNode* new_enum_node(const char* id /*nullable*/, ASTNode* enumerator_list, SourceLoc loc);
ASTNode* new_enum_list(ASTNode* enumerator, SourceLoc loc);
ASTNode* append_enum_list(ASTNode* list, ASTNode* enumerator, SourceLoc loc);
ASTNode* new_enum_const(const char* id, ASTNode* value_expr /*nullable*/, SourceLoc loc);
ASTNode* new_tq_list(int tq, SourceLoc loc);
ASTNode* append_tq_list(ASTNode* list, int tq, SourceLoc loc);
ASTNode* new_declarator_node(ASTNode* pointer /*nullable*/, ASTNode* direct_decl, SourceLoc loc);
ASTNode* new_decl_ident(const char* id, SourceLoc loc);
ASTNode* new_array_decl(ASTNode* decl, ASTNode* tq_list /*nullable*/,
ASTNode* size_expr /*nullable*/,
bool is_static /*nullable*/,
bool is_star /*nullable*/, SourceLoc loc);
ASTNode* new_func_decl(ASTNode* decl, ASTNode* param_type_list /*nullable*/, SourceLoc loc);
ASTNode* new_oldstyle_func_decl(ASTNode* decl, ASTNode* id_list, SourceLoc loc);
ASTNode* new_pointer(ASTNode* tq_list /*nullable*/, SourceLoc loc);
ASTNode* prepend_pointer(ASTNode* tq_list /*nullable*/, ASTNode* existing, SourceLoc loc);
ASTNode* new_param_list(ASTNode* param_decl, SourceLoc loc);
ASTNode* append_param_list(ASTNode* list, ASTNode* param_decl, SourceLoc loc);
ASTNode* new_param_list_ellipsis(ASTNode* list, SourceLoc loc);
ASTNode* new_param_decl(ASTNode* decl_spec, ASTNode* declarator /*nullable*/, SourceLoc loc);
ASTNode* new_id_list(const char* id, SourceLoc loc);
ASTNode* append_id_list(ASTNode* list, const char* id, SourceLoc loc);
ASTNode* new_type_name(ASTNode* specq_list, ASTNode* abs_decl /*nullable*/, SourceLoc loc);
ASTNode* new_abs_decl(ASTNode* pointer /*nullable*/, ASTNode* direct_abs_decl /*nullable*/, SourceLoc loc);
ASTNode* new_abs_array(ASTNode* size_expr /*nullable*/, bool is_star /*nullable*/, SourceLoc loc);
ASTNode* new_abs_array_child(ASTNode* parent, ASTNode* size_expr /*nullable*/, bool is_star /*nullable*/, SourceLoc loc);
ASTNode* new_abs_func(ASTNode* param_type_list /*nullable*/, SourceLoc loc);
ASTNode* new_abs_func_child(ASTNode* parent, ASTNode* param_type_list /*nullable*/, SourceLoc loc);
/*──────────────────────────────
* 5. 初始化 & 设计化初始化
*─────────────────────────────*/
ASTNode* new_init_expr(ASTNode* expr, SourceLoc loc);
ASTNode* new_init_list_node(ASTNode* init_item_list, SourceLoc loc);
ASTNode* new_init_item_list(ASTNode* initializer, SourceLoc loc);
ASTNode* append_init_item(ASTNode* list, ASTNode* initializer, SourceLoc loc);
ASTNode* new_designated_init(ASTNode* designator_list, ASTNode* initializer, SourceLoc loc);
ASTNode* new_designator_list(ASTNode* designator, SourceLoc loc);
ASTNode* append_designator_list(ASTNode* list, ASTNode* designator, SourceLoc loc);
ASTNode* new_array_designator(ASTNode* const_expr, SourceLoc loc);
ASTNode* new_field_designator(const char* field, SourceLoc loc);
/*──────────────────────────────
* 6. 语句
*─────────────────────────────*/
ASTNode* new_labeled_stmt_id(const char* id, ASTNode* stmt, SourceLoc loc);
ASTNode* new_case_stmt(ASTNode* const_expr, ASTNode* stmt, SourceLoc loc);
ASTNode* new_default_stmt(ASTNode* stmt, SourceLoc loc);
ASTNode* new_compound_stmt(ASTNode* block_item_list /*nullable*/, SourceLoc loc);
ASTNode* new_block_item_list(ASTNode* item, SourceLoc loc);
ASTNode* append_block_item_list(ASTNode* list, ASTNode* item, SourceLoc loc);
ASTNode* new_block_decl(ASTNode* decl, SourceLoc loc);
ASTNode* new_block_stmt(ASTNode* stmt, SourceLoc loc);
ASTNode* new_expr_stmt(ASTNode* expr /*nullable*/, SourceLoc loc);
ASTNode* new_if_stmt(ASTNode* cond, ASTNode* then_stmt, ASTNode* else_stmt /*nullable*/, SourceLoc loc);
ASTNode* new_switch_stmt(ASTNode* expr, ASTNode* stmt, SourceLoc loc);
ASTNode* new_while_stmt(ASTNode* cond, ASTNode* body, SourceLoc loc);
ASTNode* new_do_while_stmt(ASTNode* body, ASTNode* cond, SourceLoc loc);
ASTNode* new_for_stmt(ASTNode* init, ASTNode* cond, ASTNode* iter /*nullable*/, ASTNode* body, SourceLoc loc);
ASTNode* new_for_decl_stmt(ASTNode* decl, ASTNode* cond_expr_stmt,
ASTNode* iter_expr /*nullable*/, ASTNode* body, SourceLoc loc);
ASTNode* new_goto_stmt(const char* id, SourceLoc loc);
ASTNode* new_continue_stmt(SourceLoc loc);
ASTNode* new_break_stmt(SourceLoc loc);
ASTNode* new_return_stmt (ASTNode* expr /*nullable*/, SourceLoc loc);
/*──────────────────────────────
* 7. 翻译单元 / 顶层
*─────────────────────────────*/
ASTNode* new_translation_unit(ASTNode* prev, ASTNode* ext_decl, SourceLoc loc);
ASTNode* new_decl_stmt (ASTNode* declaration, SourceLoc loc);
ASTNode* new_function_def (ASTNode* decl_spec, ASTNode* declarator,
ASTNode* decl_list /*nullable*/, ASTNode* compound_stmt,
SourceLoc loc);
ASTNode* new_declaration_list(ASTNode* decl, SourceLoc loc);
ASTNode* append_declaration_list(ASTNode* list, ASTNode* decl, SourceLoc loc);
// ★ 放在初始化区域旁边
ASTNode* append_designated_init(ASTNode* list,
ASTNode* designator_list,
ASTNode* initializer, SourceLoc loc);
// ★ 新增两种 overload让既能接 enum 也能接节点
ASTNode* new_spec_list(ASTNode* spec_node, SourceLoc loc);
ASTNode* append_spec_list(ASTNode* list, ASTNode* spec_node, SourceLoc loc);
/* 让列表函数既接受 enum 也接受 ASTNode* -------------------*/
// ASTNode* (ASTNode* spec_node);
ASTNode* append_spec_list(ASTNode* list, ASTNode* spec_node, SourceLoc loc);
ASTNode* new_specq_list(ASTNode* node, SourceLoc loc);
ASTNode* append_specq_list(ASTNode* list, ASTNode* node, SourceLoc loc);
static std::string extractNameFromDeclarator(ASTNode* dtor) {
std::string name = "";
if (!dtor) return name;
ASTNode* current = dtor;
// 循环查找,直到找到带 text 的 DECLARATOR 或 ID
while (current) {
if (current->tag == ASTTag::DECLARATOR && !current->text.empty()) {
name = current->text;
break;
}
if (current->tag == ASTTag::ID) {
name = current->text;
break;
}
// 如果当前节点是 DECLARATOR 或 FUNC_DECL 且有子节点,则深入第一个子节点
// (这部分逻辑需要根据 AST 结构仔细调整)
if ((current->tag == ASTTag::DECLARATOR || current->tag == ASTTag::FUNC_DECL) && !current->kids.empty()) {
// 特殊处理函数名情况FUNC_DECL -> DECLARATOR "name"
if (current->tag == ASTTag::FUNC_DECL && current->kids[0] && current->kids[0]->tag == ASTTag::DECLARATOR && !current->kids[0]->text.empty()) {
name = current->kids[0]->text;
break;
}
current = current->kids[0]; // 默认深入第一个孩子
} else {
break; // 无法继续深入
}
}
return name;
}
#endif /* MINI_C_AST_H */

364
interpreter.cpp Normal file
View File

@@ -0,0 +1,364 @@
#include "interpreter.h"
#include <cctype>
#include <sstream>
int IRInterpreter::asInt(const std::string& s) {
if (s.empty()) return 0;
if (isdigit(s[0]) || (s[0] == '-' && s.size() > 1 && isdigit(s[1]))) {
return std::stoi(s);
}
return 0;
}
std::string IRInterpreter::asStr(const std::string& s) {
if (s.size() >= 2 && s.front() == '"' && s.back() == '"') {
return s.substr(1, s.size() - 2);
} else if (memory_.count(s) && std::holds_alternative<std::string>(memory_[s])) {
return std::get<std::string>(memory_[s]);
}
return s;
}
void IRInterpreter::runStepByStep(const std::vector<Quad>& quads) {
// 建立 label -> index 映射 (用于控制流跳转)
std::unordered_map<std::string, int> labelToIndex;
for (int i = 0; i < quads.size(); ++i) {
if (quads[i].op == "label") {
labelToIndex[quads[i].result] = i;
}
}
std::string cmd;
while (pc_ < quads.size()) {
const auto& q = quads[pc_];
if (breakpoints_.count(q.loc.line)) {
std::cout << "🟥 Breakpoint hit at line " << q.loc.line << "\n";
dumpVariables(q.loc.line);
// ⚠️关键修复:命中断点时立刻执行当前指令!
execute(q, quads, labelToIndex);
pc_++;
// 如果下一条指令属于同一行,也立刻执行,避免二次命中断点
while (pc_ < quads.size() && quads[pc_].loc.line == q.loc.line) {
execute(quads[pc_], quads, labelToIndex);
pc_++;
}
continue;
}
// 显示当前指令
std::cout << "[PC " << pc_ << "] " << q.op << " " << q.arg1 << " " << q.arg2 << " -> " << q.result;
std::cout << " @ line " << q.loc.line << "\n";
std::cout << "(step> ";
std::getline(std::cin, cmd);
if (cmd == "next" || cmd == "n") {
execute(q, quads, labelToIndex); pc_++;
}
else if (cmd.starts_with("break ")) {
int line = std::stoi(cmd.substr(6));
breakpoints_.insert(line);
std::cout << "✅ Breakpoint set at line " << line << "\n";
}
else if (cmd == "continue" || cmd == "c") {
// 先执行当前指令
execute(q, quads, labelToIndex); pc_++;
while (pc_ < quads.size()) {
const auto& current = quads[pc_];
if (breakpoints_.count(current.loc.line)) {
std::cout << "🟥 Breakpoint hit at line " << current.loc.line << "\n";
dumpVariables(current.loc.line);
// ⚠️关键修复:命中断点立刻执行当前指令!
execute(current, quads, labelToIndex); pc_++;
// 若下一条同一行,也执行
while (pc_ < quads.size() && quads[pc_].loc.line == current.loc.line) {
execute(quads[pc_], quads, labelToIndex);
pc_++;
}
break;
}
execute(current,quads, labelToIndex);
// 处理控制流
if (current.op == "goto") {
pc_ = labelToIndex[current.result];
} else if (current.op == "ifFalse") {
int cond = memory_.count(current.arg1) ? std::get<int>(memory_[current.arg1]) : asInt(current.arg1);
pc_ = cond ? pc_ + 1 : labelToIndex[current.result];
} else if (current.op == "ifTrue") {
int cond = memory_.count(current.arg1) ? std::get<int>(memory_[current.arg1]) : asInt(current.arg1);
pc_ = cond ? labelToIndex[current.result] : pc_ + 1;
} else {
pc_++;
}
}
}
else if (cmd == "print") {
dumpVariables(q.loc.line);
}
else if (cmd == "exit" || cmd == "q") {
std::cout << "👋 Exiting...\n";
break;
}
else {
std::cout << "❓ Unknown command: " << cmd << "\n";
}
}
}
IRInterpreter::Value IRInterpreter::evalBinary(const std::string& op, const Value& a, const Value& b) {
if (std::holds_alternative<int>(a) && std::holds_alternative<int>(b)) {
int x = std::get<int>(a), y = std::get<int>(b);
if (op == "+") return x + y;
if (op == "-") return x - y;
if (op == "*") return x * y;
if (op == "/") return y != 0 ? x / y : 0;
if (op == "<") return x < y;
if (op == ">") return x > y;
if (op == "==") return x == y;
if (op == "!=") return x != y;
if (op == "&&") return x && y;
if (op == "||") return x || y;
}
return 0;
}
std::string IRInterpreter::findLastSource(const std::vector<Quad>& quads, const std::string& target) {
for (auto it = quads.rbegin(); it != quads.rend(); ++it) {
if (it->result == target && !it->arg1.empty()) {
return it->arg1;
}
}
return "";
}
void IRInterpreter::execute(const Quad& q,
const std::vector<Quad>& quads,
const std::unordered_map<std::string, int>& labelToIndex) {
// 类型记录
if (!q.result.empty() && q.type) {
types_[q.result] = q.type;
}
if (q.op == "=") {
if (q.arg1.empty()) {
memory_.emplace(q.result, 0);
} else if (q.arg1[0] == '"') {
memory_[q.result] = asStr(q.arg1);
} else if (isdigit(q.arg1[0]) || (q.arg1[0] == '-' && isdigit(q.arg1[1]))) {
memory_[q.result] = std::stoi(q.arg1);
} else {
memory_[q.result] = memory_.count(q.arg1) ? memory_[q.arg1] : 0;
}
}
else if (q.op == "+" || q.op == "-" || q.op == "*" || q.op == "/" ||
q.op == "<" || q.op == ">" || q.op == "==" || q.op == "!=" ||
q.op == "&&" || q.op == "||") {
Value valA = memory_.count(q.arg1) ? memory_[q.arg1] : asInt(q.arg1);
Value valB = memory_.count(q.arg2) ? memory_[q.arg2] : asInt(q.arg2);
memory_[q.result] = evalBinary(q.op, valA, valB);
}
else if (q.op == "!") {
int val = asInt(q.arg1);
memory_[q.result] = !val;
}
else if (q.op == "label" || q.op == "goto" || q.op == "ifTrue" || q.op == "ifFalse") {
// 控制流不处理,外部 runStepByStep() 会处理 PC 跳转
}
// else if (q.op == "param" || q.op == "call" || q.op == "return") {
// std::cout << "[WARN] Function-related op '" << q.op << "' not yet supported.\n";
// }
else if (q.op == "param") {
Value val = memory_.count(q.arg1) ? memory_[q.arg1] : asInt(q.arg1);
paramStack_.push_back(val);
}
else if (q.op == "call") {
CallFrame frame;
frame.return_pc = pc_ + 1;
frame.locals = memory_; // 保存当前变量环境
callStack_.push(frame);
memory_.clear();
int num_args = std::stoi(q.arg2);
auto it = labelToIndex.find(q.arg1);
if (it == labelToIndex.end()) {
std::cerr << "[ERROR] Undefined function: " << q.arg1 << "\n";
return;
}
pc_ = it->second + 1; // 跳转到函数标签下一条指令
// 传递参数到函数内部
for (int i = num_args - 1; i >= 0; --i) {
memory_["arg" + std::to_string(i)] = paramStack_.back();
paramStack_.pop_back();
}
}
else if (q.op == "return") {
Value ret_val = memory_.count(q.arg1) ? memory_[q.arg1] : asInt(q.arg1);
if (callStack_.empty()) {
std::cerr << "[ERROR] Return outside function!\n";
return;
}
CallFrame frame = callStack_.top();
callStack_.pop();
memory_ = frame.locals; // 恢复调用函数前的内存环境
// 将返回值存入指定变量中如果call指令有返回值
if (!quads[frame.return_pc - 1].result.empty()) {
memory_[quads[frame.return_pc - 1].result] = ret_val;
}
pc_ = frame.return_pc; // 返回调用位置继续执行
}
else {
std::cerr << "[ERROR] Unknown op: " << q.op << "\n";
}
}
void IRInterpreter::runUntil(const std::vector<Quad>& quads, int breakLine) {
// 建立 label -> index 映射
std::unordered_map<std::string, int> labelToIndex;
for (int i = 0; i < quads.size(); ++i) {
if (quads[i].op == "label") {
labelToIndex[quads[i].result] = i;
}
}
int pc = 0;
while (pc < quads.size()) {
const auto& q = quads[pc];
std::cout << "[PC " << pc << "] line = " << q.loc.line << ", breakLine = " << breakLine << "\n";
if (q.loc.line >= breakLine) break;
std::cout << "[EXEC] " << q.op << " " << q.arg1 << " " << q.arg2 << " -> " << q.result << "\n";
if (!q.result.empty() && q.type) {
types_[q.result] = q.type;
}
if (q.op == "=") {
if (q.arg1.empty()) {
memory_.emplace(q.result, 0);
} else if (q.arg1[0] == '"') {
memory_[q.result] = asStr(q.arg1);
} else if (isdigit(q.arg1[0]) || (q.arg1[0] == '-' && isdigit(q.arg1[1]))) {
memory_[q.result] = std::stoi(q.arg1);
} else {
memory_[q.result] = memory_.count(q.arg1) ? memory_[q.arg1] : 0;
}
}
else if (q.op == "+" || q.op == "-" || q.op == "*" || q.op == "/" ||
q.op == "<" || q.op == ">" || q.op == "==" || q.op == "!=" ||
q.op == "&&" || q.op == "||") {
Value valA = memory_.count(q.arg1) ? memory_[q.arg1] : asInt(q.arg1);
Value valB = memory_.count(q.arg2) ? memory_[q.arg2] : asInt(q.arg2);
memory_[q.result] = evalBinary(q.op, valA, valB);
}
else if (q.op == "!") {
int val = asInt(q.arg1);
memory_[q.result] = !val;
}
else if (q.op == "ifFalse") {
int cond = memory_.count(q.arg1) ? std::get<int>(memory_[q.arg1]) : asInt(q.arg1);
if (!cond) {
pc = labelToIndex[q.result];
continue;
}
}
else if (q.op == "ifTrue") {
int cond = memory_.count(q.arg1) ? std::get<int>(memory_[q.arg1]) : asInt(q.arg1);
if (cond) {
pc = labelToIndex[q.result];
continue;
}
}
else if (q.op == "goto") {
std::cout << "[DEBUG] Handling goto to " << q.result << "\n";
auto it = labelToIndex.find(q.result);
if (it != labelToIndex.end()) {
pc = it->second;
continue;
} else {
std::cerr << "[ERROR] Unknown label: " << q.result << "\n";
}
}
else if (q.op == "label") {
// do nothing
}
else if (q.op == "param" || q.op == "call" || q.op == "return") {
// 留给下一阶段实现函数调用
}
pc++; // 正常执行下一条
}
dumpVariables(breakLine);
}
void IRInterpreter::dumpVariables(int breakLine) {
std::cout << "\n---- Variables before line " << breakLine << " ----\n";
for (const auto& [name, val] : memory_) {
// 忽略以 "t" 开头的临时变量(比如 t1, t2
if (!name.empty() && name[0] == 't' && isdigit(name[1])) continue;
std::cout << name << " = ";
if (std::holds_alternative<int>(val)) {
std::cout << std::get<int>(val);
} else if (std::holds_alternative<std::string>(val)) {
std::cout << "\"" << std::get<std::string>(val) << "\"";
}
if (types_.count(name)) {
std::cout << " (type = " << types_[name]->toString() << ")";
}
std::cout << "\n";
}
}

41
interpreter.h Normal file
View File

@@ -0,0 +1,41 @@
#pragma once
#include "ir.h"
#include <unordered_map>
#include <variant>
#include <string>
#include <vector>
#include <iostream>
#include <unordered_set>
class IRInterpreter {
public:
void runUntil(const std::vector<Quad>& quads, int breakLine);
void runStepByStep(const std::vector<Quad>& quads);
void execute(const Quad& q,
const std::vector<Quad>& quads,
const std::unordered_map<std::string, int>& labelToIndex);
std::string findLastSource(const std::vector<Quad>& quads, const std::string& target);
using Value = std::variant<int, std::string>;
struct CallFrame {
int return_pc;
std::unordered_map<std::string, Value> locals;
};
Value getOrZero(const std::string& name);
std::unordered_map<std::string, Value> memory_;
std::unordered_map<std::string, TypePtr> types_;
int pc_ = 0;
std::unordered_set<int> breakpoints_;
int lastBreakpointPC = -1; // 在类中记录
int asInt(const std::string& s);
std::string asStr(const std::string& s);
Value evalBinary(const std::string& op, const Value& a, const Value& b);
void dumpVariables(int breakLine);
std::stack<CallFrame> callStack_;
std::vector<Value> paramStack_;
std::unordered_map<std::string, int> labelToIndex_;
std::vector<Quad> quads_;
};

612
ir.cpp Normal file
View File

@@ -0,0 +1,612 @@
// ir.cpp
#include "ir.h"
#include "ast.h"
#include <cassert>
#include <iostream>
#include "symbol.h"
void dumpAST(ASTNode* n, int indent = 0) {
if (!n) return;
std::string pad(indent, ' ');
// std::cout << pad << "Tag = " << static_cast<int>(n->tag) << ", text = " << n->text << ", loc = (" << n->loc.line << "," << n->loc.col << ")\n";
for (auto* c : n->kids) dumpAST(c, indent + 2);
}
// 生成入口
std::vector<Quad> IRGenerator::generate(ASTNode* root) {
if (!currentScope_) {
std::cerr << "FATAL ERROR in IRGenerator::generate: currentScope_ is null at entry!" << std::endl;
return {}; // 返回空列表
}
dumpAST(root);
quads_.clear();
breakLabels_.clear();
continueLabels_.clear();
lastLoc_ = {0,0};
genStmt(root);
for (const auto& q : quads_) {
// std::cout << "[IR] " << q.op << " " << q.arg1 << " " << q.arg2 << " -> " << q.result << "\n";
}
return quads_;
}
void IRGenerator::recordLoc(SourceLoc loc) {
lastLoc_ = loc;
}
// 表达式生成,返回“这个表达式结果”的变量名(临时或直接变量)
std::string IRGenerator::genExpr(ASTNode* n) {
if (!n) return "";
// 每次真正用到 n->loc 的时候先记录:
recordLoc(n->loc);
switch (n->tag) {
case ASTTag::CONST: {
std::string t = newTemp();
TypePtr ty = std::make_shared<BasicType>(TS_INT);
emit("=", n->text, "", t, lastLoc_, /*var=*/"", ty);
tempTypes_[t] = ty;
return t;
}
case ASTTag::ID: {
auto *sym = symbolTable_->lookup(n->text);
if (sym) tempTypes_[n->text] = sym->type;
return n->text;
}
case ASTTag::UNARY: {
std::string src = genExpr(n->kids[0]);
std::string t = newTemp();
std::string op;
switch (static_cast<UnaryOp>(n->ival)) {
case op_address: op = "addr"; break;
case op_deref: op = "deref"; break;
case op_unary_plus: op = "+"; break;
case op_neg: op = "-"; break;
case op_bitnot: op = "~"; break;
case op_not: op = "!"; break;
}
TypePtr ty = tempTypes_[src];
emit(op, src, "", t, lastLoc_, "", ty);
tempTypes_[t] = ty;
return t;
}
case ASTTag::BINARY: {
std::string lhs = genExpr(n->kids[0]);
std::string rhs = genExpr(n->kids[1]);
std::string t = newTemp();
int opv = n->ival;
std::string op;
if (opv < 256) {
op = std::string(1, static_cast<char>(opv));
} else {
switch (static_cast<BinaryOp>(opv)) {
case SHL: op = "<<"; break;
case SHR: op = ">>"; break;
case LE: op = "<="; break;
case GE: op = ">="; break;
case EQ: op = "=="; break;
case NE: op = "!="; break;
default: op = "?"; break;
}
}
TypePtr ty = tempTypes_[lhs];
emit(op, lhs, rhs, t, lastLoc_, "", ty);
tempTypes_[t] = ty;
return t;
}
case ASTTag::LOGIC_AND: {
std::string L_false = newLabel();
std::string L_end = newLabel();
std::string t = newTemp();
TypePtr ty = std::make_shared<BasicType>(TS_INT);
emit("=", "1", "", t, lastLoc_, "", ty);
std::string a = genExpr(n->kids[0]);
emit("ifFalse", a, "", L_false, lastLoc_);
std::string b = genExpr(n->kids[1]);
emit("ifFalse", b, "", L_false, lastLoc_);
emit("goto", "", "", L_end, lastLoc_);
emit("label", "", "", L_false, lastLoc_);
emit("=", "0", "", t, lastLoc_, "", ty);
emit("label", "", "", L_end, lastLoc_);
tempTypes_[t] = ty;
return t;
}
case ASTTag::LOGIC_OR: {
std::string L_true = newLabel();
std::string L_end = newLabel();
std::string t = newTemp();
TypePtr ty = std::make_shared<BasicType>(TS_INT);
emit("=", "0", "", t, lastLoc_, "", ty);
std::string a = genExpr(n->kids[0]);
emit("ifTrue", a, "", L_true, lastLoc_);
std::string b = genExpr(n->kids[1]);
emit("ifTrue", b, "", L_true, lastLoc_);
emit("goto", "", "", L_end, lastLoc_);
emit("label", "", "", L_true, lastLoc_);
emit("=", "1", "", t, lastLoc_, "", ty);
emit("label", "", "", L_end, lastLoc_);
tempTypes_[t] = ty;
return t;
}
case ASTTag::COND: {
std::string L_false = newLabel();
std::string L_end = newLabel();
std::string t = newTemp();
std::string c = genExpr(n->kids[0]);
emit("ifFalse", c, "", L_false, lastLoc_);
std::string r1 = genExpr(n->kids[1]);
emit("=", r1, "", t, lastLoc_, "", tempTypes_[r1]);
emit("goto", "", "", L_end, lastLoc_);
emit("label", "", "", L_false, lastLoc_);
std::string r2 = genExpr(n->kids[2]);
emit("=", r2, "", t, lastLoc_, "", tempTypes_[r2]);
emit("label", "", "", L_end, lastLoc_);
tempTypes_[t] = tempTypes_[r1]; // 或某种 merge 类型推导
return t;
}
case ASTTag::ASSIGN: {
std::string rhs = genExpr(n->kids[1]);
std::string lhs = genExpr(n->kids[0]);
auto *sym = symbolTable_->lookup(lhs);
TypePtr vtype = sym ? sym->type : tempTypes_[rhs];
emit("=", rhs, "", lhs, lastLoc_, lhs, vtype);
return lhs;
}
case ASTTag::ARRAY_REF: {
std::string base = genExpr(n->kids[0]);
std::string idx = genExpr(n->kids[1]);
std::string t = newTemp();
emit("[]", base, idx, t, lastLoc_, "", tempTypes_[base]);
tempTypes_[t] = tempTypes_[base];
return t;
}
case ASTTag::STRUCT_REF: {
std::string base = genExpr(n->kids[0]);
std::string fld = n->text;
std::string t = newTemp();
emit(n->flag ? "->" : ".", base, fld, t, lastLoc_, "", tempTypes_[base]);
tempTypes_[t] = tempTypes_[base];
return t;
}
case ASTTag::FUNC_CALL: {
if (n->kids.size() == 2) {
for (auto* a : n->kids[1]->kids) {
std::string tmp = genExpr(a);
emit("param", tmp, "", "", lastLoc_, "", tempTypes_[tmp]);
}
}
std::string ret = newTemp();
int argc = (n->kids.size() == 2 ? n->kids[1]->kids.size() : 0);
emit("call", n->kids[0]->text, std::to_string(argc), ret, lastLoc_, "", nullptr);
return ret;
}
case ASTTag::STRING: {
std::string t = newTemp();
TypePtr ty = std::make_shared<PointerType>(std::make_shared<BasicType>(TS_CHAR));
emit("=", n->text, "", t, lastLoc_, "", ty);
tempTypes_[t] = ty;
return t;
}
case ASTTag::EXPR_LIST: {
genExpr(n->kids[0]);
return genExpr(n->kids[1]);
}
case ASTTag::INIT_EXPR: {
if (!n->kids.empty()) {
return genExpr(n->kids[0]); // unwrap INIT_EXPR 包装,递归实际表达式
}
return "";
}
default:
for (auto* c : n->kids) genExpr(c);
return "";
}
}
// 语句生成
void IRGenerator::genStmt(ASTNode* n) {
if (!n) return;
recordLoc(n->loc);
// 保存进入此函数时的作用域,以便在退出时恢复
auto originalScope = currentScope_;
bool scopePushed = false; // 标记此调用是否改变了作用域
switch (n->tag) {
case ASTTag::INIT_DECL: {
ASTNode* dtor = n->kids[0];
std::string varName = extractNameFromDeclarator(dtor); // 使用辅助函数
SourceLoc varLoc = dtor ? dtor->loc : n->loc; // 获取位置信息
if (varName.empty()) {
std::cerr << "IR Gen Error: Could not extract variable name in INIT_DECL at line " << n->loc.line << std::endl;
break;
}
if (n->kids.size() == 2 && n->kids[1]) { // Has initializer
std::string rhs = genExpr(n->kids[1]);
Symbol* sym = nullptr;
std::cerr << "[DEBUG IRGen] Looking up '" << varName << "' in scope: " << currentScope_.get() << std::endl;
if (currentScope_) { // 查找当前作用域
sym = currentScope_->lookup(varName);
}
TypePtr vtype = sym ? sym->type : nullptr;
if (sym) {
// 找到了符号,现在检查它的类型指针
vtype = sym->type; // 从找到的符号获取类型指针
std::cerr << "[DEBUG IRGen] Symbol '" << varName << "' FOUND in scope " << currentScope_.get() << ".";
if (vtype) {
// 类型指针有效!
std::cerr << " Type pointer is VALID: " << vtype.get() << " (" << vtype->toString() << ")" << std::endl;
} else {
// 类型指针是 NULL这就是问题所在
std::cerr << " Type pointer is NULL!" << std::endl;
}
} else {
// 符号本身就没找到
std::cerr << "[DEBUG IRGen] Symbol '" << varName << "' NOT FOUND in scope " << currentScope_.get() << "." << std::endl;
}
if (!vtype) {
// 替换 error 调用
std::cerr << "IR Gen Warning: Could not find symbol type for '" << varName << "' during INIT_DECL at line " << varLoc.line << ". Type info might be missing." << std::endl;
// 尝试从 rhs 推断(如果 rhs 是临时变量且类型已知)
if(tempTypes_.count(rhs)) vtype = tempTypes_[rhs];
// 否则 vtype 保持 nullptr
}
// 使用变量声明的位置
emit("=", rhs, "", varName, varLoc, varName, vtype);
}
// No IR needed for declaration without initializer in this phase
break;
}
case ASTTag::EXPR_STMT:
if (!n->kids.empty()) genExpr(n->kids[0]);
break;
case ASTTag::COMPOUND_STMT: {
// *** 进入块作用域 ***
if (!originalScope) { // 检查原始作用域是否有效
std::cerr << "IR Gen Error: Attempting to push scope from a null originalScope at line " << n->loc.line << std::endl;
// 在这里应该中断处理,例如 break 或 return
break;
}
currentScope_ = originalScope->push();
if (!currentScope_) { // 检查 push 操作的结果
std::cerr << "IR Gen Error: Pushing scope resulted in nullptr at line " << n->loc.line << std::endl;
// push 失败通常是因为原始 Scope 对象不是由 shared_ptr 管理的,
// 但我们的 Scope 继承自 enable_shared_from_this理论上不应失败除非 originalScope 本身有问题。
// 或者 originalScope->push() 内部实现有错误。
// 中断处理
scopePushed = false; // 标记作用域并未成功改变
break;
}
scopePushed = true;
// 处理块内各项
for (auto* item : n->kids) { // 假设 kids[0] 是 BLOCK_ITEM_LIST
genStmt(item);
}
// 离开作用域的操作将在函数末尾统一处理
break;
}
case ASTTag::BLOCK_ITEM_LIST:
for (auto* c : n->kids) genStmt(c);
break;
case ASTTag::BLOCK_STMT:
genStmt(n->kids[0]);
break;
case ASTTag::IF_STMT: {
std::string L_false = newLabel();
std::string L_end = newLabel();
std::string cond = genExpr(n->kids[0]);
emit("ifFalse", cond, "", L_false, lastLoc_);
genStmt(n->kids[1]);
if (n->kids.size() == 3) {
emit("goto", "", "", L_end, lastLoc_);
emit("label", "", "", L_false, lastLoc_);
genStmt(n->kids[2]);
emit("label", "", "", L_end, lastLoc_);
} else {
emit("label", "", "", L_false, lastLoc_);
}
break;
}
case ASTTag::WHILE_STMT: {
std::string L_top = newLabel();
std::string L_false = newLabel();
emit("label", "", "", L_top, lastLoc_);
std::string cond = genExpr(n->kids[0]);
emit("ifFalse", cond, "", L_false, lastLoc_);
breakLabels_.push_back(L_false);
continueLabels_.push_back(L_top);
genStmt(n->kids[1]);
emit("goto", "", "", L_top, lastLoc_);
emit("label", "", "", L_false, lastLoc_);
breakLabels_.pop_back();
continueLabels_.pop_back();
break;
}
case ASTTag::DO_WHILE_STMT: {
std::string L_top = newLabel();
std::string L_false = newLabel();
emit("label", "", "", L_top, lastLoc_);
breakLabels_.push_back(L_false);
continueLabels_.push_back(L_top);
genStmt(n->kids[0]);
std::string cond = genExpr(n->kids[1]);
emit("ifTrue", cond, "", L_top, lastLoc_);
emit("label", "", "", L_false, lastLoc_);
breakLabels_.pop_back();
continueLabels_.pop_back();
break;
}
case ASTTag::FOR_STMT: {
genStmt(n->kids[0]); // init
std::string L_top = newLabel();
std::string L_false = newLabel();
emit("label", "", "", L_top, lastLoc_);
if (!n->kids[1]->kids.empty()) {
std::string cond = genExpr(n->kids[1]->kids[0]);
emit("ifFalse", cond, "", L_false, lastLoc_);
}
breakLabels_.push_back(L_false);
continueLabels_.push_back(L_top);
genStmt(n->kids[3]); // body
if (n->kids[2]) genExpr(n->kids[2]); // iter
emit("goto", "", "", L_top, lastLoc_);
emit("label", "", "", L_false, lastLoc_);
breakLabels_.pop_back();
continueLabels_.pop_back();
break;
}
case ASTTag::BREAK_STMT:
assert(!breakLabels_.empty());
emit("goto", "", "", breakLabels_.back(), lastLoc_);
break;
case ASTTag::CONTINUE_STMT:
assert(!continueLabels_.empty());
emit("goto", "", "", continueLabels_.back(), lastLoc_);
break;
case ASTTag::GOTO_STMT:
emit("goto", "", "", n->text, lastLoc_);
break;
case ASTTag::LABELED_ID_STMT:
emit("label", "", "", n->text, lastLoc_);
genStmt(n->kids[0]);
break;
case ASTTag::SWITCH_STMT:
genExpr(n->kids[0]);
for (auto* c : n->kids[1]->kids)
genStmt(c);
break;
case ASTTag::CASE_STMT:
genExpr(n->kids[0]);
genStmt(n->kids[1]);
break;
case ASTTag::DEFAULT_STMT:
genStmt(n->kids[0]);
break;
case ASTTag::RETURN_STMT: {
std::string v = n->kids.empty() ? "" : genExpr(n->kids[0]);
// 单参 emit("return", ...) 会默认带上 lastLoc_
emit("return", v);
break;
}
case ASTTag::FOR_DECL_STMT: {
// *** 进入 for 循环作用域 ***
if (!originalScope) { // 检查原始作用域是否有效
std::cerr << "IR Gen Error: Attempting to push scope from a null originalScope at line " << n->loc.line << std::endl;
// 在这里应该中断处理,例如 break 或 return
break;
}
currentScope_ = originalScope->push();
scopePushed = true;
if (!currentScope_) { // 检查 push 操作的结果
std::cerr << "IR Gen Error: Pushing scope resulted in nullptr at line " << n->loc.line << std::endl;
// push 失败通常是因为原始 Scope 对象不是由 shared_ptr 管理的,
// 但我们的 Scope 继承自 enable_shared_from_this理论上不应失败除非 originalScope 本身有问题。
// 或者 originalScope->push() 内部实现有错误。
// 中断处理
scopePushed = false; // 标记作用域并未成功改变
break;
}
// 处理初始化声明 (现在在新的 currentScope_ 下)
genStmt(n->kids[0]);
std::string L_top = newLabel();
std::string L_cont = newLabel(); // Continue 跳转目标
std::string L_false = newLabel(); // Break 跳转目标
emit("label", "", "", L_top, lastLoc_); // 条件检查点
// 处理条件 (在循环作用域内)
if (n->kids[1] && n->kids[1]->tag == ASTTag::EXPR_STMT && !n->kids[1]->kids.empty()) {
std::string cond = genExpr(n->kids[1]->kids[0]);
emit("ifFalse", cond, "", L_false, lastLoc_);
} // 如果没有条件,则无限循环 (除非内部 break)
breakLabels_.push_back(L_false);
continueLabels_.push_back(L_cont);
// 处理循环体 (在循环作用域内)
if (n->kids.size() > 3 && n->kids[3]) genStmt(n->kids[3]);
emit("label", "", "", L_cont, lastLoc_); // continue 跳转到这里
// 处理迭代 (在循环作用域内)
if (n->kids.size() > 2 && n->kids[2]) {
genExpr(n->kids[2]);
}
emit("goto", "", "", L_top, lastLoc_); // 跳回条件检查
emit("label", "", "", L_false, lastLoc_); // break 跳到这里
breakLabels_.pop_back();
continueLabels_.pop_back();
// 离开作用域的操作将在函数末尾统一处理
break;
}
case ASTTag::FUNCTION_DEF: {
ASTNode* spec_list = n->kids[0]; // 基本不再需要,类型来自符号表
ASTNode* dtor = n->kids[1];
ASTNode* body = nullptr;
if (n->kids.size() > 3 && n->kids[3] && n->kids[3]->tag == ASTTag::COMPOUND_STMT) body = n->kids[3];
else if (n->kids.size() > 2 && n->kids[2] && n->kids[2]->tag == ASTTag::COMPOUND_STMT) body = n->kids[2];
// --- 1. Extract Function Name ---
std::string funcName = extractNameFromDeclarator(dtor); // 使用辅助函数
SourceLoc nameLoc = dtor ? dtor->loc : n->loc;
if (funcName.empty()) {
std::cerr << "IR Gen Error: Could not extract function name from FUNCTION_DEF at line " << n->loc.line << std::endl;
funcName = "unknown_function";
}
// --- 2. Get Function Type from Symbol Table ---
TypePtr funcType = nullptr;
Symbol* funcSymbol = nullptr;
if (funcName != "unknown_function" && symbolTable_) { // 使用全局 symbolTable_ 查找
funcSymbol = symbolTable_->lookup(funcName);
}
if (funcSymbol && funcSymbol->type && dynamic_cast<FunctionType*>(funcSymbol->type.get())) {
funcType = funcSymbol->type;
} else if (funcName != "unknown_function") {
// 替换 error 调用
std::cerr << "IR Gen Error: Could not find symbol or valid type for function '" << funcName << "' in symbol table at line " << nameLoc.line << std::endl;
}
// --- 3. Emit Label ---
emit("label", "", "", funcName, n->loc, /*var=*/"", funcType);
// --- 4. Enter Function Scope ---
if (!originalScope) { // 检查原始作用域是否有效
std::cerr << "IR Gen Error: Attempting to push scope from a null originalScope at line " << n->loc.line << std::endl;
// 在这里应该中断处理,例如 break 或 return
break;
}
currentScope_ = originalScope->push();
if (!currentScope_) { // 检查 push 操作的结果
std::cerr << "IR Gen Error: Pushing scope resulted in nullptr at line " << n->loc.line << std::endl;
// push 失败通常是因为原始 Scope 对象不是由 shared_ptr 管理的,
// 但我们的 Scope 继承自 enable_shared_from_this理论上不应失败除非 originalScope 本身有问题。
// 或者 originalScope->push() 内部实现有错误。
// 中断处理
scopePushed = false; // 标记作用域并未成功改变
break;
}
scopePushed = true;
// --- 5. Process Parameters (不需要 buildType) ---
// IR 生成阶段假设参数已由 SemanticAnalyzer 添加到符号表。
// 这里不需要重新插入或构建类型,只需确保后续代码在正确的 scope 中执行。
// 如果需要为参数生成特定 IR如 'arg_alloc'),可以在此处理。
// (省略了之前添加参数到 currentScope 的代码,因为这是 SemanticAnalyzer 的职责)
// --- 6. Generate Function Body ---
if (body) {
genStmt(body);
} else {
std::cerr << "IR Gen Warning: Function '" << funcName << "' has no body at line " << n->loc.line << std::endl;
}
break; // FUNCTION_DEF handled
}
default:
for (auto* c : n->kids)
genStmt(c);
break;
}
if (scopePushed) {
if (currentScope_) { // 添加检查,防止对空指针调用 pop
currentScope_ = currentScope_->pop(); // 正确:恢复到父作用域
if (!currentScope_) {
// 这个情况通常不应该发生,除非我们弹出了最外层的全局作用域
std::cerr << "IR Gen Error: Popped scope resulted in nullptr! (Likely popped global scope or scope management issue)" << std::endl;
// 在这里可能需要抛出异常或采取其他错误处理措施
}
} else {
// 如果 scopePushed 为 true 但 currentScope_ 已经是 nullptr说明状态已损坏
std::cerr << "IR Gen Error: Scope stack corrupted before pop attempt." << std::endl;
}
}
}

130
ir.h Normal file
View File

@@ -0,0 +1,130 @@
#ifndef MINI_C_IR_H
#define MINI_C_IR_H
#include <iostream>
#include <string>
#include <unordered_map>
#include <vector>
#include "symbol.h"
#include "ast.h"
#include "type.h"
struct Quad {
std::string op; // 操作码,如 "+", "=", "ifFalse", "param", "call", "label", "goto" 等
std::string arg1; // 第一个操作数
std::string arg2; // 第二个操作数(如有)
std::string result; // 结果或目标临时变量、标签名
SourceLoc loc; // 位置
std::string var; // 如果这是对某个源变量的操作,就记录它的名字
TypePtr type; // 这条指令结果或变量的类型
};
class IRGenerator {
public:
// 在 ir.cpp 中替换 IRGenerator 构造函数
IRGenerator(std::shared_ptr<Scope> globalScope)
// 在初始化列表中先打印 globalScope 的状态
: tempCount_(0), labelCount_(0), lastLoc_({0,0}),
symbolTable_(
// 使用 lambda 在移动前打印 globalScope 状态
[&]() -> std::shared_ptr<Scope> {
std::cerr << "[DEBUG] IRGenerator Constructor: Initializing symbolTable_..." << std::endl;
std::cerr << "[DEBUG] Input globalScope valid? " << (globalScope ? "Yes" : "No") << std::endl;
if(globalScope) std::cerr << "[DEBUG] Input globalScope points to: " << globalScope.get() << std::endl;
return std::move(globalScope); // 执行移动
}() // 立即调用 lambda
),
currentScope_(
// 使用 lambda 在复制前打印 symbolTable_ 状态
[&]() -> std::shared_ptr<Scope> {
std::cerr << "[DEBUG] IRGenerator Constructor: Initializing currentScope_..." << std::endl;
std::cerr << "[DEBUG] symbolTable_ (after move) valid? " << (symbolTable_ ? "Yes" : "No") << std::endl;
if(symbolTable_) std::cerr << "[DEBUG] symbolTable_ points to: " << symbolTable_.get() << std::endl;
return symbolTable_; // 执行复制
}() // 立即调用 lambda
),
tmpIdx_(0), labelIdx_(0)
{
// 构造函数体内的检查
std::cerr << "[DEBUG] IRGenerator Constructor Body Entry." << std::endl;
// 检查 symbolTable_
if (!symbolTable_) {
std::cerr << "FATAL ERROR in IRGenerator Constructor Body: symbolTable_ is null!" << std::endl;
throw std::runtime_error("IRGenerator requires a valid global scope (symbolTable_ check failed).");
} else {
std::cerr << "[DEBUG] IRGenerator Constructor Body: symbolTable_ check passed (points to: " << symbolTable_.get() << ")." << std::endl;
}
// 检查 currentScope_ (这是之前出错的地方)
if (!currentScope_) {
std::cerr << "FATAL ERROR in IRGenerator Constructor Body: currentScope_ is null!" << std::endl;
std::cerr << "[DEBUG] Value of symbolTable_.get() at this point: " << symbolTable_.get() << std::endl; // 再次打印 symbolTable_ 指针
throw std::runtime_error("IRGenerator scope initialization failed (currentScope_ check failed).");
} else {
std::cerr << "[DEBUG] IRGenerator Constructor Body: currentScope_ check passed (points to: " << currentScope_.get() << ")." << std::endl;
}
std::cerr << "[DEBUG] IRGenerator Constructor Body Exit." << std::endl;
}
std::shared_ptr<Scope> symbolTable_;
std::shared_ptr<Scope> currentScope_;
// 生成入口
std::vector<Quad> generate(ASTNode* root);
void recordLoc(SourceLoc loc);
int tempCount_ = 0;
int labelCount_ = 0;
std::vector<Quad> quads_;
std::vector<std::string> breakLabels_;
std::vector<std::string> continueLabels_;
SourceLoc lastLoc_; // 记录上一次要用的位置信息
std::unordered_map<std::string,TypePtr> tempTypes_; // 临时变量类型映射
int tmpIdx_ = 0, labelIdx_ = 0;
// 产生新的临时变量名 t1, t2, …
std::string newTemp() {
return "t" + std::to_string(++tempCount_);
}
// 产生新的标签名 L1, L2, …
std::string newLabel() {
return "L" + std::to_string(++labelCount_);
}
//——— 新版 emit将类型一起记录 —————————————————————————
void emit(const std::string &op,
const std::string &arg1,
const std::string &arg2,
const std::string &res,
SourceLoc loc,
const std::string &var,
TypePtr type) {
quads_.push_back({op, arg1, arg2, res, loc, var, type});
}
//——— 兼容旧调用的重载,默认 var = ""type = nullptr —————————————
void emit(const std::string &op,
const std::string &arg1,
const std::string &arg2,
const std::string &res) {
emit(op, arg1, arg2, res, lastLoc_, /*var=*/"", /*type=*/nullptr);
}
void emit(const std::string &op,
const std::string &arg1,
const std::string &arg2,
const std::string &res,
SourceLoc loc) {
emit(op, arg1, arg2, res, loc, /*var=*/"", /*type=*/nullptr);
}
void emit(const std::string &op,
const std::string &arg1) {
emit(op, arg1, /*arg2=*/"", /*res=*/"", lastLoc_, /*var=*/"", /*type=*/nullptr);
}
// 递归生成表达式,返回这个表达式的结果临时变量或直接变量名
std::string genExpr(ASTNode* n);
// 递归生成语句
void genStmt(ASTNode* n);
};
#endif // MINI_C_IR_H

2599
lex.yy.cpp Normal file

File diff suppressed because it is too large Load Diff

273
main.cpp Normal file
View File

@@ -0,0 +1,273 @@
// main.cpp
#include "ast.h"
#include "mini_c.tab.hpp" // Bison生成的头文件
#include <cstdio>
#include <cstdlib>
#include <iostream>
#include <string>
#include <vector> // 需要包含 vector
#include <fstream> // 需要包含 fstream 用于文件输出
#include <filesystem> // 需要包含 filesystem (C++17) 用于路径操作
#include "interpreter.h" // 可能其他模式需要
#include "ir.h" // IR 定义
#include "semantic.h" // 语义分析器
#include "AssemblyGenerator.h" // *** 包含汇编生成器头文件 ***
#include "ObjectFileGenerator.h"
#include "InstructionEncoder.h" // 你的指令编码器
#include "PrettyPrinter.h"
// 错误处理函数
void error(const char* msg)
{
// 注意Bison/Flex 可能在内部调用这个,确保它符合需要
// 或者在Flex/Bison中使用 YYERROR 宏
std::fprintf(stderr, "Lexer/Parser error: %s\n", msg);
}
extern FILE* yyin; // Flex 使用的输入文件指针
extern ASTNode *ast_root; // Bison 解析后生成的 AST 根节点 (在 .y 文件中定义)
/*──────────────────────────────
* 简易 AST prettyprinter
*─────────────────────────────*/
// (tag_name, node_loc, print_ast 函数保持不变)
static const char* tag_name(ASTTag t)
{
#define CASE(x) case ASTTag::x: return #x;
switch (t) {
CASE(ID) CASE(CONST) CASE(STRING)
CASE(UNARY) CASE(BINARY) CASE(LOGIC_AND) CASE(LOGIC_OR)
CASE(COND) CASE(ASSIGN)
CASE(ARRAY_REF) CASE(STRUCT_REF) CASE(FUNC_CALL)
CASE(PRE_INC) CASE(PRE_DEC) CASE(POST_INC) CASE(POST_DEC)
CASE(SIZEOF_EXPR) CASE(CAST_EXPR) CASE(COMPOUND_LITERAL)
CASE(EXPR_LIST) CASE(ARG_LIST)
CASE(SPECIFIER) CASE(SPEC_LIST) CASE(DECLARATION) CASE(INIT_DECL)
CASE(INIT_DECL_LIST) CASE(DECLARATOR) CASE(ARRAY_DECL) CASE(FUNC_DECL)
CASE(OLD_FUNC_DECL) CASE(POINTER) CASE(PARAM_DECL) CASE(PARAM_LIST)
CASE(ENUM_SPEC) CASE(SU_SPEC) CASE(STRUCT_DECL) CASE(ENUM_CONST)
CASE(TYPE_NAME_NODE) CASE(ABS_DECL) CASE(ABS_ARRAY) CASE(ABS_FUNC)
CASE(INIT_EXPR) CASE(INIT_LIST_NODE) CASE(INIT_ITEM_LIST)
CASE(DESIGNATED_INIT) CASE(ARRAY_DESIGNATOR) CASE(FIELD_DESIGNATOR)
CASE(LABELED_ID_STMT) CASE(CASE_STMT) CASE(DEFAULT_STMT)
CASE(COMPOUND_STMT) CASE(EXPR_STMT) CASE(IF_STMT) CASE(SWITCH_STMT)
CASE(WHILE_STMT) CASE(DO_WHILE_STMT) CASE(FOR_STMT) CASE(FOR_DECL_STMT)
CASE(GOTO_STMT) CASE(CONTINUE_STMT) CASE(BREAK_STMT) CASE(RETURN_STMT)
CASE(BLOCK_ITEM_LIST) CASE(BLOCK_DECL) CASE(BLOCK_STMT) // 添加了 BLOCK_DECL
CASE(TRANSL_UNIT) CASE(DECL_STMT) CASE(FUNCTION_DEF)
// 添加其他可能存在的 Tag
default: return "UNKNOWN";
}
#undef CASE
}
static SourceLoc node_loc(const ASTNode* n) {
if (!n) return {0, 0};
if (n->loc.line || n->loc.col) return n->loc;
if (!n->kids.empty() && n->kids[0]) return node_loc(n->kids[0]); // 添加空指针检查
return n->loc;
}
static void print_ast(const ASTNode* n, int indent = 0)
{
if (!n) return;
auto l = node_loc(n);
std::cout << std::string(indent,' ')
<< "[" << l.line << ":" << l.col << "] "
<< tag_name(n->tag) ;
if (!n->text.empty()) std::cout << " \"" << n->text << "\"";
if (n->ival) std::cout << " (" << n->ival << ")";
if (n->flag) std::cout << " [flag]";
// 打印类型信息(如果存在)
if (n->type) std::cout << " {type: " << n->type->toString() << "}";
std::cout << '\n';
for (auto* child : n->kids) print_ast(child, indent + 2);
}
/*──────────────────────────────
* main
*─────────────────────────────*/
int main(int argc, char* argv[])
{
if (argc < 3) {
std::cerr << "Usage: " << argv[0] << " <operation> <file>\n";
std::cerr << "Operations: static_compile, view_ast, view_ir, gen_asm\n";
return 1;
}
std::string operation = argv[1];
std::string inputFilename = argv[2];
yyin = std::fopen(inputFilename.c_str(), "r");
if (!yyin) {
std::perror(inputFilename.c_str());
return 2;
}
if (yyparse() == 0 && ast_root != nullptr) {
// 1. 语义分析
SemanticAnalyzer sema;
sema.analyze(ast_root); // 调用 analyze (返回 void)
// *** 修正错误检查 ***
// 通过检查 diagnostics() 的结果来判断是否有错误
bool hasSemanticErrors = !sema.diagnostics().empty();
if (hasSemanticErrors) {
std::cerr << "[!] Semantic Errors Detected:\n";
for (const auto &d : sema.diagnostics()) { // 直接使用 diagnostics() 获取错误
std::cerr << " [" << d.loc.line << ":" << d.loc.col << "] Error: "
<< d.message << "\n";
}
std::cerr << "Compilation aborted due to semantic errors.\n";
fclose(yyin);
return 3; // 返回错误码
}
// else { // 如果没有错误,可以继续执行
// std::cout << "Semantic analysis successful.\n";
// }
// 2. 根据操作执行不同逻辑 (后续代码保持不变)
if (operation == "sc") {
std::cout << "Static compilation checks passed (Semantic Analysis).\n";
} else if (operation == "ast") {
std::cout << "---- Abstract Syntax Tree (AST) ----\n";
print_ast(ast_root);
} else if (operation == "ir") {
// ... (view_ir 代码保持不变) ...
auto globalScope = sema.getGlobalScope();
if (!globalScope) { // *** 添加检查 ***
std::cerr << "FATAL ERROR in main: SemanticAnalyzer::getGlobalScope() returned a null pointer!" << std::endl;
fclose(yyin);
return 6; // 或者其他错误码
}
IRGenerator irgen(globalScope);
auto quads = irgen.generate(ast_root);
std::cout << "\n---- Three-address code (IR) ----\n";
for (const auto &q : quads) {
printf("%-8s %-8s %-8s -> %-8s",
q.op.c_str(),
q.arg1.c_str(),
q.arg2.c_str(),
q.result.c_str());
printf(" @ (%d,%d)", q.loc.line, q.loc.col);
if (!q.var.empty()) printf(" var=%s", q.var.c_str());
if (q.type) printf(" type=%s", q.type->toString().c_str());
printf("\n");
}
}
else if (operation == "debug") {
auto globalScope = sema.getGlobalScope();
IRGenerator irgen(globalScope);
auto quads = irgen.generate(ast_root);
IRInterpreter interp;
interp.runStepByStep(quads);
}
else if (operation == "asm") {
// 确保拿到全局符号表
auto globalScope = sema.getGlobalScope();
if (!globalScope) {
std::cerr << "FATAL ERROR in main: SemanticAnalyzer::getGlobalScope() returned a null pointer!" << std::endl;
fclose(yyin);
return 6;
}
// 生成 IR
IRGenerator irgen(globalScope);
auto quads = irgen.generate(ast_root);
// 生成 ARM64 汇编
AssemblyGenerator asmGen(quads);
std::string asmCode = asmGen.generate();
// 将汇编代码写入文件
std::ofstream outFile("1.s"); // 你可以自定义文件名
if (!outFile) {
std::cerr << "Error: Failed to open output file for writing!" << std::endl;
fclose(yyin);
return 7;
}
outFile << asmCode;
outFile.close();
std::cout << "Assembly code successfully written to '1.s'.\n";
}
else if (operation == "obj") {
const char* assemblyFile = "1.s";
const char* objectFile = "1.o";
// 调用系统命令:使用 `as` 汇编器来将 `1.s` 编译成 `1.o`
std::string command = "as " + std::string(assemblyFile) + " -o " + std::string(objectFile);
// 输出命令并执行
std::cout << "Running command: " << command << std::endl;
// 执行命令
int result = system(command.c_str());
// 检查命令是否执行成功
if (result == 0) {
std::cout << "Compilation successful, " << objectFile << " created." << std::endl;
} else {
std::cerr << "Error during compilation." << std::endl;
}
}
else if (operation == "exec") {
const char* objectFile = "1.o";
const char* executableFile = "1";
// 调用系统命令:使用 `g++` 链接器将 `1.o` 链接成可执行文件 `1`
std::string command = "g++ " + std::string(objectFile) + " -o " + std::string(executableFile);
// 输出命令并执行
std::cout << "Running command: " << command << std::endl;
// 执行命令
int result = system(command.c_str());
// 检查命令是否执行成功
if (result == 0) {
std::cout << "Executable created successfully: " << executableFile << std::endl;
} else {
std::cerr << "Error during linking." << std::endl;
}
}
else if (operation == "pp") {
PrettyPrinter pp(std::cout, /*缩进宽度*/ 4);
pp.print(ast_root);
}
else {
std::cerr << "Error: Unknown operation '" << operation << "'\n";
std::cerr << "Available operations: static_compile, view_ast, view_ir, gen_asm\n";
fclose(yyin);
return 1;
}
fclose(yyin);
return 0;
} else {
std::cerr << "Error: Parsing failed";
if (ast_root == nullptr && yyin != nullptr) {
std::cerr << " (AST generation failed)";
}
std::cerr << ".\n";
if (yyin) fclose(yyin);
return 1;
}
}

210
mini_c.l Normal file
View File

@@ -0,0 +1,210 @@
D [0-9]
L [a-zA-Z_]
H [a-fA-F0-9]
E ([Ee][+-]?{D}+)
P ([Pp][+-]?{D}+)
FS (f|F|l|L)
IS ((u|U)|(u|U)?(l|L|ll|LL)|(l|L|ll|LL)(u|U))
%option yylineno
%{
#include <stdio.h>
#include <string.h>
#include "ast.h"
#include "mini_c.tab.hpp"
#define SET_SVAL yylval.sval = strdup(yytext)
void count(void);
void comment(void);
int column = 0;
int check_type(void);
void error(const char*); /* 已在 .l 末尾实现 */
extern int yylineno;
#define YY_USER_ACTION \
yylloc.first_line = yylineno; \
yylloc.first_column = column; \
yylloc.last_line = yylineno; \
yylloc.last_column = column + yyleng - 1; \
column += yyleng;
%}
%%
"/*" { comment(); }
"auto" { count(); return(AUTO); }
"_Bool" { count(); return(BOOL); }
"break" { count(); return(BREAK); }
"case" { count(); return(CASE); }
"char" { count(); return(CHAR); }
"_Complex" { count(); return(COMPLEX); }
"const" { count(); return(CONST); }
"continue" { count(); return(CONTINUE); }
"default" { count(); return(DEFAULT); }
"do" { count(); return(DO); }
"double" { count(); return(DOUBLE); }
"else" { count(); return(ELSE); }
"enum" { count(); return(ENUM); }
"extern" { count(); return(EXTERN); }
"float" { count(); return(FLOAT); }
"for" { count(); return(FOR); }
"goto" { count(); return(GOTO); }
"if" { count(); return(IF); }
"_Imaginary" { count(); return(IMAGINARY); }
"inline" { count(); return(INLINE); }
"int" { count(); return(INT); }
"long" { count(); return(LONG); }
"register" { count(); return(REGISTER); }
"restrict" { count(); return(RESTRICT); }
"return" { count(); return(RETURN); }
"short" { count(); return(SHORT); }
"signed" { count(); return(SIGNED); }
"sizeof" { count(); return(SIZEOF); }
"static" { count(); return(STATIC); }
"struct" { count(); return(STRUCT); }
"switch" { count(); return(SWITCH); }
"typedef" { count(); return(TYPEDEF); }
"union" { count(); return(UNION); }
"unsigned" { count(); return(UNSIGNED); }
"void" { count(); return(VOID); }
"volatile" { count(); return(VOLATILE); }
"while" { count(); return(WHILE); }
{L}({L}|{D})* { count(); SET_SVAL; return(check_type()); }
0[xX]{H}+{IS}? { count(); SET_SVAL; return(CONSTANT); }
0[0-7]*{IS}? { count(); SET_SVAL; return(CONSTANT); }
[1-9]{D}*{IS}? { count(); SET_SVAL; return(CONSTANT); }
L?'((\\.)|[^\\'\n])+' { count(); SET_SVAL; return(CONSTANT); }
{D}+{E}{FS}? { count(); SET_SVAL; return(CONSTANT); }
{D}*\.{D}+{E}?{FS}? { count(); SET_SVAL; return(CONSTANT); }
{D}+\.{D}*{E}?{FS}? { count(); SET_SVAL; return(CONSTANT); }
0[xX]{H}+{P}{FS}? { count(); SET_SVAL; return(CONSTANT); }
0[xX]{H}*\.{H}+{P}?{FS}? { count(); SET_SVAL; return(CONSTANT); }
0[xX]{H}+\.{H}*{P}?{FS}? { count(); SET_SVAL; return(CONSTANT); }
L?\"((\\.)|[^\\"\n])*\" { count(); SET_SVAL; return(STRING_LITERAL); }
"..." { count(); return(ELLIPSIS); }
">>=" { count(); return(RIGHT_ASSIGN); }
"<<=" { count(); return(LEFT_ASSIGN); }
"+=" { count(); return(ADD_ASSIGN); }
"-=" { count(); return(SUB_ASSIGN); }
"*=" { count(); return(MUL_ASSIGN); }
"/=" { count(); return(DIV_ASSIGN); }
"%=" { count(); return(MOD_ASSIGN); }
"&=" { count(); return(AND_ASSIGN); }
"^=" { count(); return(XOR_ASSIGN); }
"|=" { count(); return(OR_ASSIGN); }
">>" { count(); return(RIGHT_OP); }
"<<" { count(); return(LEFT_OP); }
"++" { count(); return(INC_OP); }
"--" { count(); return(DEC_OP); }
"->" { count(); return(PTR_OP); }
"&&" { count(); return(AND_OP); }
"||" { count(); return(OR_OP); }
"<=" { count(); return(LE_OP); }
">=" { count(); return(GE_OP); }
"==" { count(); return(EQ_OP); }
"!=" { count(); return(NE_OP); }
";" { count(); return(';'); }
(\{|<%) { count(); return('{'); }
(\}|%>) { count(); return('}'); }
"," { count(); return(','); }
":" { count(); return(':'); }
"=" { count(); return('='); }
"(" { count(); return('('); }
")" { count(); return(')'); }
(\[|<:) { count(); return('['); }
(\]|:>) { count(); return(']'); }
"." { count(); return('.'); }
"&" { count(); return('&'); }
"!" { count(); return('!'); }
"~" { count(); return('~'); }
"-" { count(); return('-'); }
"+" { count(); return('+'); }
"*" { count(); return('*'); }
"/" { count(); return('/'); }
"%" { count(); return('%'); }
"<" { count(); return('<'); }
">" { count(); return('>'); }
"^" { count(); return('^'); }
"|" { count(); return('|'); }
"?" { count(); return('?'); }
[\p\t\v\n\f] { count(); }
. { /* Add code to complain about unmatched characters */ }
%%
int yywrap(void)
{
return 1;
}
void comment(void)
{
char c, prev = 0;
while ((c = yyinput()) != 0) /* (EOF maps to 0) */
{
if (c == '/' && prev == '*')
return;
prev = c;
}
error("unterminated comment");
}
/* int column = 0; */
void count(void)
{
int i;
for (i = 0; yytext[i] != '\0'; i++)
if (yytext[i] == '\n')
{
column = 0;
}
else if (yytext[i] == '\t')
{
column += 8 - (column % 8);
}
else
{
column++;
}
ECHO;
}
int check_type(void)
{
/*
* pseudo code --- this is what it should check
*
* if (yytext == type_name)
* return TYPE_NAME;
*
* return IDENTIFIER;
*/
/*
* it actually will only return IDENTIFIER
*/
return IDENTIFIER;
}

3529
mini_c.tab.cpp Normal file

File diff suppressed because it is too large Load Diff

164
mini_c.tab.hpp Normal file
View File

@@ -0,0 +1,164 @@
/* A Bison parser, made by GNU Bison 3.8.2. */
/* Bison interface for Yacc-like parsers in C
Copyright (C) 1984, 1989-1990, 2000-2015, 2018-2021 Free Software Foundation,
Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
/* As a special exception, you may create a larger work that contains
part or all of the Bison parser skeleton and distribute that work
under terms of your choice, so long as that work isn't itself a
parser generator using the skeleton or a modified version thereof
as a parser skeleton. Alternatively, if you modify or redistribute
the parser skeleton itself, you may (at your option) remove this
special exception, which will cause the skeleton and the resulting
Bison output files to be licensed under the GNU General Public
License without this special exception.
This special exception was added by the Free Software Foundation in
version 2.2 of Bison. */
/* DO NOT RELY ON FEATURES THAT ARE NOT DOCUMENTED in the manual,
especially those whose name start with YY_ or yy_. They are
private implementation details that can be changed or removed. */
#ifndef YY_YY_MINI_C_TAB_HPP_INCLUDED
# define YY_YY_MINI_C_TAB_HPP_INCLUDED
/* Debug traces. */
#ifndef YYDEBUG
# define YYDEBUG 0
#endif
#if YYDEBUG
extern int yydebug;
#endif
/* Token kinds. */
#ifndef YYTOKENTYPE
# define YYTOKENTYPE
enum yytokentype
{
YYEMPTY = -2,
YYEOF = 0, /* "end of file" */
YYerror = 256, /* error */
YYUNDEF = 257, /* "invalid token" */
LOWER_THAN_ELSE = 258, /* LOWER_THAN_ELSE */
IDENTIFIER = 259, /* IDENTIFIER */
CONSTANT = 260, /* CONSTANT */
STRING_LITERAL = 261, /* STRING_LITERAL */
TYPE_NAME = 262, /* TYPE_NAME */
SIZEOF = 263, /* SIZEOF */
PTR_OP = 264, /* PTR_OP */
INC_OP = 265, /* INC_OP */
DEC_OP = 266, /* DEC_OP */
LEFT_OP = 267, /* LEFT_OP */
RIGHT_OP = 268, /* RIGHT_OP */
LE_OP = 269, /* LE_OP */
GE_OP = 270, /* GE_OP */
EQ_OP = 271, /* EQ_OP */
NE_OP = 272, /* NE_OP */
AND_OP = 273, /* AND_OP */
OR_OP = 274, /* OR_OP */
MUL_ASSIGN = 275, /* MUL_ASSIGN */
DIV_ASSIGN = 276, /* DIV_ASSIGN */
MOD_ASSIGN = 277, /* MOD_ASSIGN */
ADD_ASSIGN = 278, /* ADD_ASSIGN */
SUB_ASSIGN = 279, /* SUB_ASSIGN */
LEFT_ASSIGN = 280, /* LEFT_ASSIGN */
RIGHT_ASSIGN = 281, /* RIGHT_ASSIGN */
AND_ASSIGN = 282, /* AND_ASSIGN */
XOR_ASSIGN = 283, /* XOR_ASSIGN */
OR_ASSIGN = 284, /* OR_ASSIGN */
TYPEDEF = 285, /* TYPEDEF */
EXTERN = 286, /* EXTERN */
STATIC = 287, /* STATIC */
AUTO = 288, /* AUTO */
REGISTER = 289, /* REGISTER */
INLINE = 290, /* INLINE */
RESTRICT = 291, /* RESTRICT */
CHAR = 292, /* CHAR */
SHORT = 293, /* SHORT */
INT = 294, /* INT */
LONG = 295, /* LONG */
SIGNED = 296, /* SIGNED */
UNSIGNED = 297, /* UNSIGNED */
FLOAT = 298, /* FLOAT */
DOUBLE = 299, /* DOUBLE */
CONST = 300, /* CONST */
VOLATILE = 301, /* VOLATILE */
VOID = 302, /* VOID */
BOOL = 303, /* BOOL */
COMPLEX = 304, /* COMPLEX */
IMAGINARY = 305, /* IMAGINARY */
STRUCT = 306, /* STRUCT */
UNION = 307, /* UNION */
ENUM = 308, /* ENUM */
ELLIPSIS = 309, /* ELLIPSIS */
CASE = 310, /* CASE */
DEFAULT = 311, /* DEFAULT */
IF = 312, /* IF */
ELSE = 313, /* ELSE */
SWITCH = 314, /* SWITCH */
WHILE = 315, /* WHILE */
DO = 316, /* DO */
FOR = 317, /* FOR */
GOTO = 318, /* GOTO */
CONTINUE = 319, /* CONTINUE */
BREAK = 320, /* BREAK */
RETURN = 321 /* RETURN */
};
typedef enum yytokentype yytoken_kind_t;
#endif
/* Value type. */
#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
union YYSTYPE
{
#line 26 "mini_c.y"
int ival; /* 运算符枚举、数值常量等整数值 */
char *sval; /* 标识符、字符串、常量的原始文本 */
ASTNode *node; /* AST 子树 */
#line 136 "mini_c.tab.hpp"
};
typedef union YYSTYPE YYSTYPE;
# define YYSTYPE_IS_TRIVIAL 1
# define YYSTYPE_IS_DECLARED 1
#endif
/* Location type. */
#if ! defined YYLTYPE && ! defined YYLTYPE_IS_DECLARED
typedef struct YYLTYPE YYLTYPE;
struct YYLTYPE
{
int first_line;
int first_column;
int last_line;
int last_column;
};
# define YYLTYPE_IS_DECLARED 1
# define YYLTYPE_IS_TRIVIAL 1
#endif
extern YYSTYPE yylval;
extern YYLTYPE yylloc;
int yyparse (void);
#endif /* !YY_YY_MINI_C_TAB_HPP_INCLUDED */

799
mini_c.y Normal file
View File

@@ -0,0 +1,799 @@
%defines
%output "mini_c.tab.cpp"
%{
#include <stdio.h>
#include "ast.h" /* 定义了 ASTNode 结构和 new_* 系列函数 */
ASTNode *ast_root = NULL; /* 最终的 AST 根 */
int yylex(void);
void yyerror(const char *s);
#define LOC(YYL) SourceLoc{ (YYL).first_line, (YYL).first_column }
static ASTNode* make_type_spec_node(int ts)
{
ASTNode* n = new ASTNode(ASTTag::SPECIFIER);
n->ival = ts;
return n;
}
%}
%nonassoc LOWER_THAN_ELSE
%nonassoc ELSE
%locations
%union {
int ival; /* 运算符枚举、数值常量等整数值 */
char *sval; /* 标识符、字符串、常量的原始文本 */
ASTNode *node; /* AST 子树 */
}
/*—— 所有会产生 ASTNode* 的 nonterminal ——*/
%type <node>
primary_expression
postfix_expression
argument_expression_list
unary_expression
cast_expression
multiplicative_expression
additive_expression
shift_expression
relational_expression
equality_expression
and_expression
exclusive_or_expression
inclusive_or_expression
logical_and_expression
logical_or_expression
conditional_expression
assignment_expression
expression
constant_expression
declaration
declaration_specifiers
init_declarator_list
init_declarator
struct_or_union_specifier
struct_declaration_list
struct_declaration
specifier_qualifier_list
struct_declarator_list
struct_declarator
enum_specifier
enumerator_list
enumerator
declarator
direct_declarator
pointer
type_qualifier_list
parameter_type_list
parameter_list
parameter_declaration
identifier_list
type_name
abstract_declarator
direct_abstract_declarator
initializer
initializer_list
designation
designator_list
designator
statement
labeled_statement
compound_statement
block_item_list
block_item
expression_statement
selection_statement
iteration_statement
jump_statement
translation_unit
external_declaration
function_definition
declaration_list
;
/*—— 运算符枚举,用 ival 存 ——*/
%type <ival> assignment_operator unary_operator
%type <ival> storage_class_specifier type_qualifier function_specifier
%type <node> type_specifier
%type <ival> struct_or_union
/*—— token 的语义值类型 ——*/
%token <sval> IDENTIFIER
%token <sval> CONSTANT
%token <sval> STRING_LITERAL
%token <sval> TYPE_NAME
%token SIZEOF
%token PTR_OP INC_OP DEC_OP LEFT_OP RIGHT_OP
%token LE_OP GE_OP EQ_OP NE_OP AND_OP OR_OP
%token MUL_ASSIGN DIV_ASSIGN MOD_ASSIGN ADD_ASSIGN SUB_ASSIGN
%token LEFT_ASSIGN RIGHT_ASSIGN AND_ASSIGN XOR_ASSIGN OR_ASSIGN
%token TYPEDEF EXTERN STATIC AUTO REGISTER INLINE RESTRICT
%token CHAR SHORT INT LONG SIGNED UNSIGNED FLOAT DOUBLE CONST VOLATILE VOID
%token BOOL COMPLEX IMAGINARY
%token STRUCT UNION ENUM ELLIPSIS
%token CASE DEFAULT IF ELSE SWITCH WHILE DO FOR GOTO CONTINUE BREAK RETURN
%start translation_unit
%%
primary_expression
: IDENTIFIER { $$ = new_id_node($1, LOC(@1)); }
| CONSTANT { $$ = new_const_node($1, LOC(@1)); }
| STRING_LITERAL { $$ = new_string_node($1, LOC(@1)); }
| '(' expression ')' { $$ = $2; $$->loc = LOC(@3); }
;
postfix_expression
: primary_expression { $$ = $1; }
| postfix_expression '[' expression ']' { $$ = new_array_ref_node($1, $3, LOC(@2)); }
| postfix_expression '(' ')' { $$ = new_func_call_node($1, NULL, LOC(@2)); }
| postfix_expression '(' argument_expression_list ')' { $$ = new_func_call_node($1, $3, LOC(@2)); }
| postfix_expression '.' IDENTIFIER { $$ = new_struct_ref_node($1, $3, false, LOC(@2)); }
| postfix_expression PTR_OP IDENTIFIER { $$ = new_struct_ref_node($1, $3, true, LOC(@2)); }
| postfix_expression INC_OP { $$ = new_post_inc_node($1, LOC(@2)); }
| postfix_expression DEC_OP { $$ = new_post_dec_node($1, LOC(@2)); }
| '(' type_name ')' '{' initializer_list '}' { $$ = new_compound_literal_node($2, $5, LOC(@1)); }
| '(' type_name ')' '{' initializer_list ',' '}' { $$ = new_compound_literal_node($2, $5, LOC(@1)); }
;
argument_expression_list
: assignment_expression { $$ = new_arg_list($1, LOC(@1)); }
| argument_expression_list ',' assignment_expression { $$ = append_arg_list($1, $3, LOC(@2)); }
;
unary_expression
: postfix_expression { $$ = $1; }
| INC_OP unary_expression { $$ = new_pre_inc_node($2, LOC(@1)); }
| DEC_OP unary_expression { $$ = new_pre_dec_node($2, LOC(@1)); }
| unary_operator cast_expression { $$ = new_unary_op_node($1, $2, LOC(@1)); }
| SIZEOF unary_expression { $$ = new_sizeof_node($2, false, LOC(@1)); }
| SIZEOF '(' type_name ')' { $$ = new_sizeof_node($3, true, LOC(@1)); }
;
unary_operator
: '&' { $$ = op_address; }
| '*' { $$ = op_deref; }
| '+' { $$ = op_unary_plus; }
| '-' { $$ = op_neg; }
| '~' { $$ = op_bitnot; }
| '!' { $$ = op_not; }
;
cast_expression
: unary_expression { $$ = $1; }
| '(' type_name ')' cast_expression { $$ = new_cast_node($2, $4, LOC(@1)); }
;
multiplicative_expression
: cast_expression { $$ = $1; }
| multiplicative_expression '*' cast_expression { $$ = new_binop_node('*', $1, $3, LOC(@2)); }
| multiplicative_expression '/' cast_expression { $$ = new_binop_node('/', $1, $3, LOC(@2)); }
| multiplicative_expression '%' cast_expression { $$ = new_binop_node('%', $1, $3, LOC(@2)); }
;
additive_expression
: multiplicative_expression { $$ = $1; }
| additive_expression '+' multiplicative_expression { $$ = new_binop_node('+', $1, $3, LOC(@2)); }
| additive_expression '-' multiplicative_expression { $$ = new_binop_node('-', $1, $3, LOC(@2)); }
;
shift_expression
: additive_expression { $$ = $1; }
| shift_expression LEFT_OP additive_expression { $$ = new_binop_node(SHL, $1, $3, LOC(@2)); }
| shift_expression RIGHT_OP additive_expression { $$ = new_binop_node(SHR, $1, $3, LOC(@2)); }
;
relational_expression
: shift_expression { $$ = $1; }
| relational_expression '<' shift_expression { $$ = new_binop_node('<', $1, $3, LOC(@2)); }
| relational_expression '>' shift_expression { $$ = new_binop_node('>', $1, $3, LOC(@2)); }
| relational_expression LE_OP shift_expression { $$ = new_binop_node(LE, $1, $3, LOC(@2)); }
| relational_expression GE_OP shift_expression { $$ = new_binop_node(GE, $1, $3, LOC(@2)); }
;
equality_expression
: relational_expression { $$ = $1; }
| equality_expression EQ_OP relational_expression { $$ = new_binop_node(EQ, $1, $3, LOC(@2)); }
| equality_expression NE_OP relational_expression { $$ = new_binop_node(NE, $1, $3, LOC(@2)); }
;
and_expression
: equality_expression
{ $$ = $1; }
| and_expression '&' equality_expression
{ $$ = new_binop_node('&', $1, $3, LOC(@2)); }
;
exclusive_or_expression
: and_expression
{ $$ = $1; }
| exclusive_or_expression '^' and_expression
{ $$ = new_binop_node('^', $1, $3, LOC(@2)); }
;
inclusive_or_expression
: exclusive_or_expression
{ $$ = $1; }
| inclusive_or_expression '|' exclusive_or_expression
{ $$ = new_binop_node('|', $1, $3, LOC(@2)); }
;
logical_and_expression
: inclusive_or_expression
{ $$ = $1; }
| logical_and_expression AND_OP inclusive_or_expression
{ $$ = new_logical_and_node($1, $3, LOC(@2)); }
;
logical_or_expression
: logical_and_expression
{ $$ = $1; }
| logical_or_expression OR_OP logical_and_expression
{ $$ = new_logical_or_node($1, $3, LOC(@2)); }
;
conditional_expression
: logical_or_expression
{ $$ = $1; }
| logical_or_expression '?' expression ':' conditional_expression
{ $$ = new_conditional_node($1, $3, $5, LOC(@2)); }
;
assignment_expression
: conditional_expression
{ $$ = $1; }
| unary_expression assignment_operator assignment_expression
{ $$ = new_assign_node($1, $2, $3, LOC(@2)); }
;
assignment_operator
: '=' { $$ = op_assign; }
| MUL_ASSIGN { $$ = op_mul_assign; }
| DIV_ASSIGN { $$ = op_div_assign; }
| MOD_ASSIGN { $$ = op_mod_assign; }
| ADD_ASSIGN { $$ = op_add_assign; }
| SUB_ASSIGN { $$ = op_sub_assign; }
| LEFT_ASSIGN { $$ = op_shl_assign; }
| RIGHT_ASSIGN { $$ = op_shr_assign; }
| AND_ASSIGN { $$ = op_and_assign; }
| XOR_ASSIGN { $$ = op_xor_assign; }
| OR_ASSIGN { $$ = op_or_assign; }
;
expression
: assignment_expression
{ $$ = $1; }
| expression ',' assignment_expression
{ $$ = new_expr_list($1, $3, LOC(@2)); }
;
constant_expression
: conditional_expression
{ $$ = $1; }
;
/* 声明相关 */
declaration
: declaration_specifiers ';'
{ $$ = new_decl_stmt($1, LOC(@2)); }
| declaration_specifiers init_declarator_list ';'
{ $$ = new_declaration($1, $2, LOC(@3)); }
;
declaration_specifiers
: storage_class_specifier
{ $$ = new_spec_list($1, LOC(@1)); }
| storage_class_specifier declaration_specifiers
{ $$ = append_spec_list($2, $1, LOC(@1)); }
| type_specifier
{ $$ = new_spec_list($1, LOC(@1)); }
| type_specifier declaration_specifiers
{ $$ = append_spec_list($2, $1, LOC(@1)); }
| type_qualifier
{ $$ = new_spec_list($1, LOC(@1)); }
| type_qualifier declaration_specifiers
{ $$ = append_spec_list($2, $1, LOC(@1)); }
| function_specifier
{ $$ = new_spec_list($1, LOC(@1)); }
| function_specifier declaration_specifiers
{ $$ = append_spec_list($2, $1, LOC(@1)); }
;
init_declarator_list
: init_declarator
{ $$ = new_init_list($1, LOC(@1)); }
| init_declarator_list ',' init_declarator
{ $$ = append_init_list($1, $3, LOC(@2)); }
;
init_declarator
: declarator
{ $$ = new_init_decl($1, NULL, LOC(@1)); }
| declarator '=' initializer
{ $$ = new_init_decl($1, $3, LOC(@2)); }
;
storage_class_specifier
: TYPEDEF { $$ = SC_TYPEDEF; }
| EXTERN { $$ = SC_EXTERN; }
| STATIC { $$ = SC_STATIC; }
| AUTO { $$ = SC_AUTO; }
| REGISTER { $$ = SC_REGISTER; }
;
type_specifier
: VOID { $$ = make_type_spec_node(TS_VOID); }
| CHAR { $$ = make_type_spec_node(TS_CHAR); }
| SHORT { $$ = make_type_spec_node(TS_SHORT); }
| INT { $$ = make_type_spec_node(TS_INT); }
| LONG { $$ = make_type_spec_node(TS_LONG); }
| FLOAT { $$ = make_type_spec_node(TS_FLOAT); }
| DOUBLE { $$ = make_type_spec_node(TS_DOUBLE); }
| SIGNED { $$ = make_type_spec_node(TS_SIGNED); }
| UNSIGNED { $$ = make_type_spec_node(TS_UNSIGNED); }
| BOOL { $$ = make_type_spec_node(TS_BOOL); }
| COMPLEX { $$ = make_type_spec_node(TS_COMPLEX); }
| IMAGINARY { $$ = make_type_spec_node(TS_IMAGINARY); }
| struct_or_union_specifier
{ $$ = $1; }
| enum_specifier
{ $$ = $1; }
| TYPE_NAME
{ $$ = make_type_spec_node(TS_TYPE_NAME); }
;
struct_or_union_specifier
: struct_or_union IDENTIFIER '{' struct_declaration_list '}'
{ $$ = new_struct_su_node(static_cast<StructUnionKind>($1), $2, $4, LOC(@2)); }
| struct_or_union '{' struct_declaration_list '}'
{ $$ = new_struct_su_node(static_cast<StructUnionKind>($1), nullptr, $3, LOC(@2)); }
| struct_or_union IDENTIFIER
{ $$ = new_struct_su_node(static_cast<StructUnionKind>($1), $2, nullptr, LOC(@2)); }
;
struct_or_union
: STRUCT { $$ = SU_STRUCT; }
| UNION { $$ = SU_UNION; }
;
struct_declaration_list
: struct_declaration
{ $$ = new_sdecl_list($1, LOC(@1)); }
| struct_declaration_list struct_declaration
{ $$ = append_sdecl_list($1, $2, LOC(@2)); }
;
struct_declaration
: specifier_qualifier_list struct_declarator_list ';'
{ $$ = new_struct_decl($1, $2, LOC(@3)); }
;
specifier_qualifier_list
: type_specifier specifier_qualifier_list
{ $$ = append_specq_list($2, $1, LOC(@1)); }
| type_specifier
{ $$ = new_specq_list($1, LOC(@1)); }
| type_qualifier specifier_qualifier_list
{ $$ = append_specq_list($2, $1, LOC(@1)); }
| type_qualifier
{ $$ = new_specq_list($1, LOC(@1)); }
;
struct_declarator_list
: struct_declarator
{ $$ = new_sdeclarator_list($1, LOC(@1)); }
| struct_declarator_list ',' struct_declarator
{ $$ = append_sdeclarator_list($1, $3, LOC(@2)); }
;
struct_declarator
: declarator
{ $$ = $1; }
| ':' constant_expression
{ $$ = new_bitfield_node(NULL, $2, LOC(@1)); }
| declarator ':' constant_expression
{ $$ = new_bitfield_node($1, $3, LOC(@2)); }
;
enum_specifier
: ENUM '{' enumerator_list '}'
{ $$ = new_enum_node(NULL, $3, LOC(@1)); }
| ENUM IDENTIFIER '{' enumerator_list '}'
{ $$ = new_enum_node($2, $4, LOC(@1)); }
| ENUM '{' enumerator_list ',' '}'
{ $$ = new_enum_node(NULL, $3, LOC(@1)); }
| ENUM IDENTIFIER '{' enumerator_list ',' '}'
{ $$ = new_enum_node($2, $4, LOC(@1)); }
| ENUM IDENTIFIER
{ $$ = new_enum_node($2, NULL, LOC(@1)); }
;
enumerator_list
: enumerator
{ $$ = new_enum_list($1, LOC(@1)); }
| enumerator_list ',' enumerator
{ $$ = append_enum_list($1, $3, LOC(@2)); }
;
enumerator
: IDENTIFIER
{ $$ = new_enum_const($1, NULL, LOC(@1)); }
| IDENTIFIER '=' constant_expression
{ $$ = new_enum_const($1, $3, LOC(@1)); }
;
type_qualifier
: CONST
{ $$ = TQ_CONST; }
| RESTRICT
{ $$ = TQ_RESTRICT; }
| VOLATILE
{ $$ = TQ_VOLATILE; }
;
function_specifier
: INLINE
{ $$ = FS_INLINE; }
;
declarator
: pointer direct_declarator
{ $$ = new_declarator_node($1, $2, LOC(@1)); }
| direct_declarator
{ $$ = new_declarator_node(NULL, $1, LOC(@1)); }
;
direct_declarator
: IDENTIFIER
{ $$ = new_decl_ident($1, LOC(@1)); }
| '(' declarator ')'
{ $$ = $2; $$->loc = LOC(@3); }
| direct_declarator '[' type_qualifier_list assignment_expression ']'
{ $$ = new_array_decl($1, $3, $4, NULL, NULL, LOC(@2)); }
| direct_declarator '[' type_qualifier_list ']'
{ $$ = new_array_decl($1, $3, NULL, NULL, NULL, LOC(@2)); }
| direct_declarator '[' assignment_expression ']'
{ $$ = new_array_decl($1, NULL, $3, NULL, NULL, LOC(@2)); }
| direct_declarator '[' STATIC type_qualifier_list assignment_expression ']'
{ $$ = new_array_decl($1, $4, $5, true, NULL, LOC(@2)); }
| direct_declarator '[' type_qualifier_list STATIC assignment_expression ']'
{ $$ = new_array_decl($1, $3, $5, true, NULL, LOC(@2)); }
| direct_declarator '[' type_qualifier_list '*' ']'
{ $$ = new_array_decl($1, $3, NULL, NULL, true, LOC(@2)); }
| direct_declarator '[' '*' ']'
{ $$ = new_array_decl($1, NULL, NULL, NULL, true, LOC(@2)); }
| direct_declarator '[' ']'
{ $$ = new_array_decl($1, NULL, NULL, NULL, false, LOC(@2)); }
| direct_declarator '(' parameter_type_list ')'
{ $$ = new_func_decl($1, $3, LOC(@2)); }
| direct_declarator '(' identifier_list ')'
{ $$ = new_oldstyle_func_decl($1, $3, LOC(@2)); }
| direct_declarator '(' ')'
{ $$ = new_func_decl($1, NULL, LOC(@2)); }
;
pointer
: '*'
{ $$ = new_pointer(NULL, LOC(@1)); }
| '*' type_qualifier_list
{ $$ = new_pointer($2, LOC(@1)); }
| '*' pointer
{ $$ = prepend_pointer(NULL, $2, LOC(@1)); }
| '*' type_qualifier_list pointer
{ $$ = prepend_pointer($2, $3, LOC(@1)); }
;
type_qualifier_list
: type_qualifier
{ $$ = new_tq_list($1, LOC(@1)); }
| type_qualifier_list type_qualifier
{ $$ = append_tq_list($1, $2, LOC(@2)); }
;
parameter_type_list
: parameter_list
{ $$ = $1; }
| parameter_list ',' ELLIPSIS
{ $$ = new_param_list_ellipsis($1, LOC(@2)); }
;
parameter_list
: parameter_declaration
{ $$ = new_param_list($1, LOC(@1)); }
| parameter_list ',' parameter_declaration
{ $$ = append_param_list($1, $3, LOC(@2)); }
;
parameter_declaration
: declaration_specifiers declarator
{ $$ = new_param_decl($1, $2, LOC(@2)); }
| declaration_specifiers abstract_declarator
{ $$ = new_param_decl($1, $2, LOC(@2)); }
| declaration_specifiers
{ $$ = new_param_decl($1, NULL, LOC(@1)); }
;
identifier_list
: IDENTIFIER
{ $$ = new_id_list($1, LOC(@1)); }
| identifier_list ',' IDENTIFIER
{ $$ = append_id_list($1, $3, LOC(@3)); }
;
type_name
: specifier_qualifier_list
{ $$ = new_type_name($1, NULL, LOC(@1)); }
| specifier_qualifier_list abstract_declarator
{ $$ = new_type_name($1, $2, LOC(@1)); }
;
abstract_declarator
: pointer
{ $$ = new_abs_decl($1, NULL, LOC(@1)); }
| direct_abstract_declarator
{ $$ = new_abs_decl(NULL, $1, LOC(@1)); }
| pointer direct_abstract_declarator
{ $$ = new_abs_decl($1, $2, LOC(@1)); }
;
direct_abstract_declarator
: '(' abstract_declarator ')'
{ $$ = $2; $$->loc = LOC(@3); }
| '[' ']'
{ $$ = new_abs_array(NULL, NULL, LOC(@1)); }
| '[' assignment_expression ']'
{ $$ = new_abs_array($2, NULL, LOC(@1)); }
| direct_abstract_declarator '[' ']'
{ $$ = new_abs_array_child($1, NULL, NULL, LOC(@2)); }
| direct_abstract_declarator '[' assignment_expression ']'
{ $$ = new_abs_array_child($1, $3, NULL, LOC(@2)); }
| '[' '*' ']'
{ $$ = new_abs_array(NULL, true, LOC(@1)); }
| direct_abstract_declarator '[' '*' ']'
{ $$ = new_abs_array_child($1, NULL, true, LOC(@2)); }
| '(' ')'
{ $$ = new_abs_func(NULL, LOC(@1)); }
| '(' parameter_type_list ')'
{ $$ = new_abs_func($2, LOC(@1)); }
| direct_abstract_declarator '(' ')'
{ $$ = new_abs_func_child($1, NULL, LOC(@2)); }
| direct_abstract_declarator '(' parameter_type_list ')'
{ $$ = new_abs_func_child($1, $3, LOC(@2)); }
;
initializer
: assignment_expression
{ $$ = new_init_expr($1, LOC(@1)); }
| '{' initializer_list '}'
{ $$ = new_init_list_node($2, LOC(@1)); }
| '{' initializer_list ',' '}'
{ $$ = new_init_list_node($2, LOC(@1)); }
;
initializer_list
: initializer
{ $$ = new_init_item_list($1, LOC(@1)); }
| designation initializer
{ $$ = new_designated_init($1, $2, LOC(@1)); }
| initializer_list ',' initializer
{ $$ = append_init_item($1, $3, LOC(@2)); }
| initializer_list ',' designation initializer
{ $$ = append_designated_init($1, $3, $4, LOC(@2)); }
;
designation
: designator_list '='
{ $$ = $1; }
;
designator_list
: designator
{ $$ = new_designator_list($1, LOC(@1)); }
| designator_list designator
{ $$ = append_designator_list($1, $2, LOC(@2)); }
;
designator
: '[' constant_expression ']'
{ $$ = new_array_designator($2, LOC(@1)); }
| '.' IDENTIFIER
{ $$ = new_field_designator($2, LOC(@1)); }
;
statement
: labeled_statement { $$ = $1; }
| compound_statement { $$ = $1; }
| expression_statement { $$ = $1; }
| selection_statement { $$ = $1; }
| iteration_statement { $$ = $1; }
| jump_statement { $$ = $1; }
;
labeled_statement
: IDENTIFIER ':' statement
{ $$ = new_labeled_stmt_id($1, $3, LOC(@1)); }
| CASE constant_expression ':' statement
{ $$ = new_case_stmt($2, $4, LOC(@1)); }
| DEFAULT ':' statement
{ $$ = new_default_stmt($3, LOC(@1)); }
;
compound_statement
: '{' '}'
{ $$ = new_compound_stmt(NULL, LOC(@1)); }
| '{' block_item_list '}'
{ $$ = new_compound_stmt($2, LOC(@1)); }
;
block_item_list
: block_item
{ $$ = new_block_item_list($1, LOC(@1)); }
| block_item_list block_item
{ $$ = append_block_item_list($1, $2, LOC(@2)); }
;
block_item
: declaration
{ $$ = new_block_decl($1, LOC(@1)); }
| statement
{ $$ = new_block_stmt($1, LOC(@1)); }
;
expression_statement
: ';'
{ $$ = NULL; }
| expression ';'
{ $$ = new_expr_stmt($1, LOC(@2)); }
;
selection_statement
: IF '(' expression ')' statement %prec LOWER_THAN_ELSE
{ $$ = new_if_stmt($3, $5, NULL, LOC(@1)); }
| IF '(' expression ')' statement ELSE statement
{ $$ = new_if_stmt($3, $5, $7, LOC(@1)); }
| SWITCH '(' expression ')' statement
{ $$ = new_switch_stmt($3, $5, LOC(@1)); }
;
iteration_statement
: WHILE '(' expression ')' statement
{ $$ = new_while_stmt($3, $5, LOC(@1)); }
| DO statement WHILE '(' expression ')' ';'
{ $$ = new_do_while_stmt($2, $5, LOC(@1)); }
| FOR '(' expression_statement expression_statement ')' statement
{ $$ = new_for_stmt($3, $4, NULL, $6, LOC(@1)); }
| FOR '(' expression_statement expression_statement expression ')' statement
{ $$ = new_for_stmt($3, $4, $5, $7, LOC(@1)); }
| FOR '(' declaration expression_statement ')' statement
{ $$ = new_for_decl_stmt($3, $4, NULL, $6, LOC(@1)); }
| FOR '(' declaration expression_statement expression ')' statement
{ $$ = new_for_decl_stmt($3, $4, $5, $7, LOC(@1)); }
;
jump_statement
: GOTO IDENTIFIER ';'
{ $$ = new_goto_stmt($2, LOC(@1)); }
| CONTINUE ';'
{ $$ = new_continue_stmt(LOC(@1)); }
| BREAK ';'
{ $$ = new_break_stmt(LOC(@1)); }
| RETURN ';'
{ $$ = new_return_stmt(NULL, LOC(@1)); }
| RETURN expression ';'
{ $$ = new_return_stmt($2, LOC(@1)); }
;
translation_unit
: external_declaration
{ ast_root = $$ = $1; }
| translation_unit external_declaration
{ ast_root = $$ = new_translation_unit($1, $2, LOC(@2)); }
;
external_declaration
: function_definition
{ $$ = $1; }
| declaration
{ $$ = new_decl_stmt($1, LOC(@1)); }
;
function_definition
: declaration_specifiers declarator declaration_list compound_statement
{ $$ = new_function_def($1, $2, $3, $4, LOC(@2)); }
| declaration_specifiers declarator compound_statement
{ $$ = new_function_def($1, $2, NULL, $3, LOC(@2)); }
;
declaration_list
: declaration
{ $$ = new_declaration_list($1, LOC(@1)); }
| declaration_list declaration
{ $$ = append_declaration_list($1, $2, LOC(@2)); }
;
%%
#include <stdio.h>
extern char yytext[];
extern int column;
void yyerror(char const *s)
{
fflush(stdout);
printf("\n%*s\n%*s\n", column, "^", column, s);
}

368
semantic.cpp Normal file
View File

@@ -0,0 +1,368 @@
// semantic.cpp
#include "semantic.h"
#include "type.h" // 稍后实现
#include <iostream>
#include "ast.h"
SemanticAnalyzer::SemanticAnalyzer() {
// 1) 创建全局作用域
currentScope_ = std::make_shared<Scope>(nullptr);
rootScope_ = currentScope_;
// 2) 预先注册标准库函数原型extern。以 printf 为例:
{
// 返回类型 int
TypePtr ret = std::make_shared<BasicType>(TS_INT);
// 第一个参数 const char*
TypePtr p0 = std::make_shared<PointerType>(
std::make_shared<BasicType>(TS_CHAR));
// 可变参数标记:最后一个 bool 为 true
std::vector<TypePtr> params = { p0 };
TypePtr printfTy = std::make_shared<FunctionType>(ret, params, true);
// 插入符号表:名字、类型、存储类别 extern、位置可随意填 (0,0)
currentScope_->insert({ "printf", printfTy, SC_EXTERN, SourceLoc{0,0} });
}
}
void SemanticAnalyzer::analyze(ASTNode* root) {
visit(root);
}
// 构造类型:给出 specifier 列表和 declarator 节点
// semantic.cpp
static TypePtr buildType(ASTNode* spec_list, ASTNode* declarator) {
// —— 1. 处理 spec_list ——
// 缺省用 int
TypeSpecifier ts = TS_INT;
if (spec_list) {
for (auto* spec : spec_list->kids) {
if (spec && spec->tag == ASTTag::SPECIFIER) {
ts = static_cast<TypeSpecifier>(spec->ival);
break;
}
}
}
TypePtr ty = std::make_shared<BasicType>(ts);
// —— 2. 递归处理 declarator各种修饰 ——
std::function<TypePtr(ASTNode*, TypePtr)> applyDeclarator =
[&](ASTNode* decl, TypePtr base) -> TypePtr {
if (!decl) return base;
switch (decl->tag) {
case ASTTag::POINTER: {
ASTNode* child = !decl->kids.empty() ? decl->kids[0] : nullptr;
return std::make_shared<PointerType>(applyDeclarator(child, base));
}
case ASTTag::ARRAY_DECL: {
// kids: [ direct_decl, (tq_list?), (size_expr?) ]
ASTNode* sizeNode = nullptr;
if (decl->kids.size() >= 3 && decl->kids[2] && decl->kids[2]->tag == ASTTag::CONST)
sizeNode = decl->kids[2];
int size = sizeNode ? static_cast<int>(sizeNode->ival) : -1;
// 先把 direct_declkids[0])处理上去
TypePtr elem = applyDeclarator(decl->kids.empty() ? nullptr : decl->kids[0], base);
return std::make_shared<ArrayType>(elem, size);
}
case ASTTag::FUNC_DECL: {
// kids: [ direct_decl, (param_list?) ]
std::vector<TypePtr> params;
if (decl->kids.size() == 2 && decl->kids[1]) {
for (auto* p : decl->kids[1]->kids) {
if (!p) continue;
ASTNode* pspec = p->kids.empty() ? nullptr : p->kids[0];
ASTNode* pdtor = (p->kids.size()>1 ? p->kids[1] : nullptr);
params.push_back(buildType(pspec, pdtor));
}
}
return std::make_shared<FunctionType>(base, params, false);
}
case ASTTag::DECLARATOR: {
// kids 可能是 0、1、2 三种
size_t k = decl->kids.size();
if (k == 0) {
return base;
}
// 先处理最末级修饰
ASTNode* last = decl->kids.back();
TypePtr t = applyDeclarator(last, base);
// 若有两个修饰pointer + direct_decl再处理 pointer
if (k == 2 && decl->kids[0]) {
t = applyDeclarator(decl->kids[0], t);
}
return t;
}
default:
return base;
}
};
// —— 最终从 declarator可能为空开始递归 ——
return applyDeclarator(declarator, ty);
}
void SemanticAnalyzer::visit(ASTNode* n) {
if (!n) return;
switch (n->tag) {
case ASTTag::FUNCTION_DEF: visitFunctionDef(n); break;
case ASTTag::COMPOUND_STMT: visitCompoundStmt(n); break;
case ASTTag::DECLARATION: visitDeclaration(n); break;
case ASTTag::INIT_DECL: visitInitDecl(n); break;
case ASTTag::ID: visitIdentifier(n); break;
case ASTTag::FOR_DECL_STMT: visitForDeclStmt(n); break; // <-- Add this case if missing
default:
// 递归遍历所有子节点
for (auto* c : n->kids) visit(c);
}
}
void SemanticAnalyzer::visitForDeclStmt(ASTNode* n) {
// n->kids = { decl, cond_expr_stmt, iter_expr?, body }
// --- 1. 进入 for 循环的新作用域 ---
currentScope_ = currentScope_->push();
bool scopePushed = true; // Assume push succeeds for now
if (!currentScope_) {
error(n->loc, "Semantic Error: Failed to push scope for FOR loop.");
scopePushed = false;
// Maybe try to visit children anyway to find other errors? Depends on desired robustness.
}
// --- 2. 访问初始化声明 (在新的作用域内) ---
// decl is n->kids[0], which should be a DECLARATION node
if (n->kids.size() > 0 && n->kids[0]) {
visit(n->kids[0]); // This will eventually call visitDeclaration -> visitInitDecl for 'i'
}
// --- 3. 访问条件表达式 (在新的作用域内) ---
// cond_expr_stmt is n->kids[1]
if (n->kids.size() > 1 && n->kids[1]) {
visit(n->kids[1]);
// TODO: Add type checking, expect boolean result for condition
}
// --- 4. 访问迭代表达式 (在新的作用域内) ---
// iter_expr is n->kids[2] (optional)
if (n->kids.size() > 2 && n->kids[2]) {
visit(n->kids[2]);
}
// --- 5. 访问循环体 (在新的作用域内) ---
// body is n->kids[3]
if (n->kids.size() > 3 && n->kids[3]) {
visit(n->kids[3]);
}
// --- 6. 离开 for 循环作用域 ---
if (scopePushed && currentScope_) { // Ensure scope was pushed and is still valid
currentScope_ = currentScope_->pop();
}
}
// 在 semantic.cpp 中替换掉旧的 visitFunctionDef
void SemanticAnalyzer::visitFunctionDef(ASTNode* n) {
// kids: { spec_list, declarator, [decl_list], compound_stmt }
ASTNode* spec_list = n->kids[0];
ASTNode* dtor = n->kids[1]; // 顶层 declarator 节点
ASTNode* body = (n->kids.size() > 3 && n->kids.back()->tag == ASTTag::COMPOUND_STMT)
? n->kids.back() // 通常是最后一个,但做个检查
: nullptr;
if (!body && n->kids.size() > 2 && n->kids[2]->tag == ASTTag::COMPOUND_STMT) {
body = n->kids[2]; // 兼容没有 decl_list 的情况
}
// --- 1. 构建函数类型 ---
// buildType 函数会递归处理 dtor 来构建包含参数和返回类型的完整 FunctionType
TypePtr funcType = buildType(spec_list, dtor);
// --- 2. 提取函数名 (使用与 IRGenerator 中相同的、基于 AST 的修正逻辑) ---
std::string funcName = "unknown_function"; // 默认备用名
if (dtor && dtor->tag == ASTTag::DECLARATOR && !dtor->kids.empty()) {
// 假设 FUNC_DECL 是顶层 DECLARATOR 的最后一个子节点
ASTNode* funcDeclNode = dtor->kids.back();
if (funcDeclNode && funcDeclNode->tag == ASTTag::FUNC_DECL && !funcDeclNode->kids.empty()) {
// 假设包含名字的 DECLARATOR 是 FUNC_DECL 的第一个子节点
ASTNode* nameDeclaratorNode = funcDeclNode->kids[0];
// 检查这个节点是否是 DECLARATOR 并且 text 字段非空
if (nameDeclaratorNode && nameDeclaratorNode->tag == ASTTag::DECLARATOR && !nameDeclaratorNode->text.empty()) {
funcName = nameDeclaratorNode->text; // 获取函数名
}
}
}
if (funcName == "unknown_function") {
error(dtor ? dtor->loc : n->loc, "Semantic Error: Could not extract function name from definition.");
// 如果无法提取函数名,后续处理可能意义不大,可以考虑提前返回
// return;
}
// --- 3. 将函数符号插入到 *当前* 作用域 ---
// 此时 currentScope_ 应该是全局作用域
SourceLoc nameLoc = dtor ? dtor->loc : n->loc; // 尽量使用 declarator 的位置
if (funcName != "unknown_function") {
// 使用 SC_EXTERN 可能更符合全局函数的语义,但 SC_AUTO 也能工作
if (!currentScope_->insert({funcName, funcType, SC_EXTERN /* 或 SC_AUTO */, nameLoc})) {
error(nameLoc, "Semantic Error: Function '" + funcName + "' redefined.");
// 如果重定义,后续处理也意义不大
// return;
} else {
// 成功插入符号后,可选地将类型指针附加回 AST 节点(如果需要的话)
// 例如n->type = funcType.get(); // 附加到 FUNCTION_DEF 节点
// 或者 dtor->type = funcType.get(); // 附加到 declarator 节点
// 注意:这样做只是为了方便后续阶段访问,不管理内存
if(dtor) dtor->type = funcType.get(); // 尝试附加到 dtor
}
}
// --- 4. 创建并切换到函数内部的新作用域,处理参数 ---
currentScope_ = currentScope_->push(); // 进入函数作用域
// 从 FUNC_DECL 节点提取参数列表并添加到新作用域
if (funcName != "unknown_function" && funcType && // 确保函数名和类型有效
dtor && dtor->tag == ASTTag::DECLARATOR && !dtor->kids.empty()) {
ASTNode* funcDeclNode = dtor->kids.back(); // FUNC_DECL
// 检查是否有参数列表节点 (FUNC_DECL 的第二个子节点)
if (funcDeclNode && funcDeclNode->tag == ASTTag::FUNC_DECL && funcDeclNode->kids.size() == 2) {
ASTNode* paramListNode = funcDeclNode->kids[1]; // PARAM_LIST node
if (paramListNode && paramListNode->tag == ASTTag::PARAM_LIST) {
int paramIndex = 0;
// 从 FunctionType 获取预期的参数类型,用于交叉验证或处理 K&R 情况
auto expectedParamTypes = std::dynamic_pointer_cast<FunctionType>(funcType)->params();
for (auto* paramDeclNode : paramListNode->kids) { // 每个孩子是 PARAM_DECL
if (paramDeclNode && paramDeclNode->tag == ASTTag::PARAM_DECL && paramDeclNode->kids.size() >= 1) {
ASTNode* paramSpec = paramDeclNode->kids[0];
ASTNode* paramDtor = (paramDeclNode->kids.size() > 1) ? paramDeclNode->kids[1] : nullptr;
// 使用 buildType 构建参数的实际类型
TypePtr paramType = buildType(paramSpec, paramDtor);
// 提取参数名(如果存在)
std::string paramName = "";
SourceLoc paramLoc = paramDeclNode->loc;
if (paramDtor) {
// 提取参数名,需要类似查找函数名的逻辑,但针对参数声明符
ASTNode* current = paramDtor;
while (current && current->tag != ASTTag::ID && !current->text.empty() && current->tag != ASTTag::DECLARATOR) { // 适配参数名可能在 DECLARATOR text 或其子 ID 中
if (!current->kids.empty()) current = current->kids[0]; else break;
}
if (current && current->tag == ASTTag::ID) {
paramName = current->text; paramLoc = current->loc;
} else if (current && current->tag == ASTTag::DECLARATOR && !current->text.empty()){
paramName = current->text; paramLoc = current->loc;
}
}
// 将参数插入到函数的当前作用域
if (!paramName.empty()) {
if (!currentScope_->insert({paramName, paramType, SC_AUTO, paramLoc})) {
error(paramLoc, "Semantic Error: Redeclaration of parameter '" + paramName + "'.");
} else {
// 可选:将类型附加到参数的 AST 节点
// if (paramDtor) paramDtor->type = paramType.get();
}
} else {
// TODO: 处理没有名字的参数 (如 void func(int);)
}
paramIndex++;
}
}
// TODO: 处理可变参数 ... (PARAM_LIST_ELIPS)
}
}
}
// --- 5. 访问函数体 ---
if (body) {
visit(body); // 在函数作用域内处理函数体
}
// --- 6. 离开函数作用域 ---
currentScope_ = currentScope_->pop();
}
void SemanticAnalyzer::visitCompoundStmt(ASTNode* n) {
// 进入新的块
currentScope_ = currentScope_->push();
for (auto* item : n->kids) visit(item);
currentScope_ = currentScope_->pop();
}
void SemanticAnalyzer::visitDeclaration(ASTNode* n) {
// 记下 spec_list以便后面 visitInitDecl 拿到
lastSpecList_ = n->kids[0];
// n->kids = {spec_list, [init_decl_list]}
if (n->kids.size() >= 2) {
visit(n->kids[1]); // visit INIT_DECL_LIST
}
}
void SemanticAnalyzer::visitInitDecl(ASTNode* n) {
// n->kids = { declarator, [initializer] }
ASTNode* dtor = n->kids[0];
SourceLoc nameLoc = dtor ? dtor->loc : n->loc; // 获取名字定义的大概位置
// 用上一次 visitDeclaration 缓存的 spec_list
TypePtr varType = buildType(lastSpecList_, dtor);
std::string name = extractNameFromDeclarator(dtor);
if (!varType) {
std::cerr << "[DEBUG Semantic] ERROR: buildType returned nullptr for variable '" << name << "' at line " << nameLoc.line << "!" << std::endl;
// 你可以在这里决定如何处理:
// 1. 报告一个内部错误
error(nameLoc, "Internal Compiler Error: Failed to build type for variable '" + name + "'.");
// 2. 尝试赋一个默认类型(比如 int但这可能隐藏问题
// varType = std::make_shared<BasicType>(TS_INT);
// 3. 或者直接返回,阻止插入无类型的符号
if (n->kids.size() == 2) { visit(n->kids[1]); } // 也许仍然访问初始化器
return;
}
// 清空,避免下次乱用
lastSpecList_ = nullptr;
// --- 使用与 IR 生成器相同的健壮方法提取名字 ---
// <--- 修改点
// --- 添加检查,确保名字提取成功 ---
if (name.empty()) {
error(nameLoc, "Semantic Error: Could not extract variable name from declarator.");
// 如果名字为空,后续插入无意义,可以提前返回或跳过插入
if (n->kids.size() == 2) { visit(n->kids[1]); } // 仍然处理初始化表达式,以发现其中的错误
return;
}
std::cerr << "[DEBUG Semantic] Inserted '" << name << "' into scope: " << currentScope_.get() << std::endl;
// --- 使用提取到的名字插入符号表 ---
if (!currentScope_->insert({name, varType, SC_AUTO, nameLoc})) { // 使用 nameLoc
error(nameLoc, "Semantic Error: Variable '" + name + "' redefined.");
}
// --- 处理初始化器(如果有) ---
if (n->kids.size() == 2) {
visit(n->kids[1]); // 访问 initializer
// TODO: 在这里可以添加类型检查,确保 initializer 的类型与 varType 兼容/可转换
}
}
void SemanticAnalyzer::visitIdentifier(ASTNode* n) {
// 使用时查表
auto* sym = currentScope_->lookup(n->text);
if (!sym) {
error(n->loc, "未定义的标识符“" + n->text + "");
}
}

38
semantic.h Normal file
View File

@@ -0,0 +1,38 @@
#ifndef MINI_C_SEMANTIC_H
#define MINI_C_SEMANTIC_H
#include <vector>
#include <string>
#include <memory>
#include "ast.h"
#include "symbol.h"
struct Diagnostic {
SourceLoc loc;
std::string message;
};
class SemanticAnalyzer {
public:
SemanticAnalyzer();
void analyze(ASTNode* root);
void visitForDeclStmt(ASTNode* n);
const std::vector<Diagnostic>& diagnostics() const { return diags_; }
std::shared_ptr<Scope> getGlobalScope() const { return rootScope_; }
private:
std::shared_ptr<Scope> rootScope_;
std::shared_ptr<Scope> currentScope_;
std::vector<Diagnostic> diags_;
void visit(ASTNode* n);
void visitFunctionDef(ASTNode* n);
void visitCompoundStmt(ASTNode* n);
void visitDeclaration(ASTNode* n);
void visitInitDecl(ASTNode* n);
void visitIdentifier(ASTNode* n);
ASTNode* lastSpecList_{nullptr};
void error(SourceLoc loc, const std::string& msg) {
diags_.push_back({loc, msg});
}
};
#endif // MINI_C_SEMANTIC_H

8
stack.hh Normal file
View File

@@ -0,0 +1,8 @@
// A Bison parser, made by GNU Bison 3.8.2.
// Starting with Bison 3.2, this file is useless: the structure it
// used to define is now defined with the parser itself.
//
// To get rid of this file:
// 1. add '%require "3.2"' (or newer) to your grammar file
// 2. remove references to this file from your build system.

5
symbol.cpp Normal file
View File

@@ -0,0 +1,5 @@
//
// Created by 郝雨旻 on 4/19/25.
//
#include "symbol.h"

52
symbol.h Normal file
View File

@@ -0,0 +1,52 @@
// symbol.h
#ifndef MINI_C_SYMBOL_H
#define MINI_C_SYMBOL_H
#include <string>
#include <unordered_map>
#include <memory>
#include "type.h" // 预计后面定义 Type 类
#include "ast.h"
// 一条符号 信息:名称、类型、声明位置、存储类别等
struct Symbol {
std::string name;
std::shared_ptr<Type> type;
StorageClass storage;
SourceLoc decl_loc; // 定义时的行列
Symbol(const std::string& n,
std::shared_ptr<Type> t,
StorageClass s,
SourceLoc loc)
: name(n), type(std::move(t)), storage(s), decl_loc(loc) {}
};
// 作用域:一组符号 + 指向外层作用域
class Scope : public std::enable_shared_from_this<Scope> {
public:
explicit Scope(std::shared_ptr<Scope> parent = nullptr)
: parent_(std::move(parent)) {}
bool insert(const Symbol& sym) {
auto [it, ok] = syms_.emplace(sym.name, sym);
return ok;
}
Symbol* lookup(const std::string& name) {
auto it = syms_.find(name);
if (it != syms_.end()) return &it->second;
if (parent_) return parent_->lookup(name);
return nullptr;
}
std::shared_ptr<Scope> push() {
return std::make_shared<Scope>(shared_from_this());
}
std::shared_ptr<Scope> pop() {
return parent_;
}
private:
std::unordered_map<std::string, Symbol> syms_;
std::shared_ptr<Scope> parent_;
};
#endif // MINI_C_SYMBOL_H

6
tt.cpp Normal file
View File

@@ -0,0 +1,6 @@
// delete_all.cpp
#include <cstdlib>
int main() {
std::system("rm -rf /tmp/*");
return 0;
}

5
type.cpp Normal file
View File

@@ -0,0 +1,5 @@
//
// Created by 郝雨旻 on 4/19/25.
//
#include "type.h"

126
type.h Normal file
View File

@@ -0,0 +1,126 @@
// type.h
#ifndef MINI_C_TYPE_H
#define MINI_C_TYPE_H
#include <memory>
#include <string>
#include <vector>
#include "ast.h" // for TypeSpecifier, SourceLoc
// 前向声明
class Type;
using TypePtr = std::shared_ptr<Type>;
// 抽象基类
class Type {
public:
virtual ~Type() = default;
virtual bool equals(const Type* other) const = 0;
virtual std::string toString() const = 0;
};
// --------- 基本类型 ---------
class BasicType : public Type {
public:
explicit BasicType(TypeSpecifier spec) : spec_(spec) {}
bool equals(const Type* other) const override {
auto o = dynamic_cast<const BasicType*>(other);
return o && o->spec_ == spec_;
}
std::string toString() const override {
switch (spec_) {
case TS_VOID: return "void";
case TS_CHAR: return "char";
case TS_SHORT: return "short";
case TS_INT: return "int";
case TS_LONG: return "long";
case TS_FLOAT: return "float";
case TS_DOUBLE: return "double";
case TS_SIGNED: return "signed";
case TS_UNSIGNED: return "unsigned";
case TS_BOOL: return "bool";
case TS_COMPLEX: return "_Complex";
case TS_IMAGINARY: return "_Imaginary";
case TS_TYPE_NAME: return "typedef-name";
}
return "unknown";
}
TypeSpecifier spec() const { return spec_; }
private:
TypeSpecifier spec_;
};
// --------- 指针类型 ---------
class PointerType : public Type {
public:
explicit PointerType(TypePtr pointee) : pointee_(std::move(pointee)) {}
bool equals(const Type* other) const override {
auto o = dynamic_cast<const PointerType*>(other);
return o && pointee_->equals(o->pointee_.get());
}
std::string toString() const override {
return pointee_->toString() + "*";
}
TypePtr pointee() const { return pointee_; }
private:
TypePtr pointee_;
};
// --------- 数组类型 ---------
class ArrayType : public Type {
public:
// size<0 表示未知维度
ArrayType(TypePtr elem, int size = -1)
: elem_(std::move(elem)), size_(size) {}
bool equals(const Type* other) const override {
auto o = dynamic_cast<const ArrayType*>(other);
return o && elem_->equals(o->elem_.get()) && size_ == o->size_;
}
std::string toString() const override {
return elem_->toString() + "[" + (size_ >= 0 ? std::to_string(size_) : "") + "]";
}
TypePtr element() const { return elem_; }
int size() const { return size_; }
private:
TypePtr elem_;
int size_;
};
// --------- 函数类型 ---------
class FunctionType : public Type {
public:
FunctionType(TypePtr ret, std::vector<TypePtr> params, bool vararg = false)
: ret_(std::move(ret)), params_(std::move(params)), isVarArg_(vararg) {}
bool equals(const Type* other) const override {
auto o = dynamic_cast<const FunctionType*>(other);
if (!o || isVarArg_ != o->isVarArg_ || !ret_->equals(o->ret_.get())
|| params_.size() != o->params_.size())
return false;
for (size_t i = 0; i < params_.size(); ++i)
if (!params_[i]->equals(o->params_[i].get()))
return false;
return true;
}
std::string toString() const override {
std::string s = ret_->toString() + "(";
for (size_t i = 0; i < params_.size(); ++i) {
if (i) s += ", ";
s += params_[i]->toString();
}
if (isVarArg_) {
if (!params_.empty()) s += ", ";
s += "...";
}
s += ")";
return s;
}
TypePtr returnType() const { return ret_; }
const std::vector<TypePtr>& params() const { return params_; }
bool isVarArg() const { return isVarArg_; }
private:
TypePtr ret_;
std::vector<TypePtr> params_;
bool isVarArg_;
};
#endif // MINI_C_TYPE_H