Files
PCCCompiler/PCCcompiler/codegen.cpp
2026-02-15 12:18:38 +01:00

545 lines
23 KiB
C++

#include "codegen.h"
#include <iostream>
#include <sstream>
#include <vector>
// Pomocnicza funkcja: sprawdza czy string to czysta liczba
bool isNumber(const std::string& s) {
if (s.empty()) return false;
size_t start = (s[0] == '-') ? 1 : 0;
for (size_t i = start; i < s.length(); i++) {
if (!isdigit(s[i])) return false;
}
return true;
}
std::string getVarLocation(const std::string& name, const std::map<std::string, int>& locals) {
std::string cleanName = name;
// Usuwamy ewentualne spacje
size_t first = cleanName.find_first_not_of(" \t");
if (first != std::string::npos) cleanName = cleanName.substr(first);
size_t last = cleanName.find_last_not_of(" \t");
if (last != std::string::npos) cleanName = cleanName.substr(0, last + 1);
if (cleanName.empty()) return "0";
if (isNumber(cleanName)) return cleanName;
if (cleanName == "RAX") return "eax";
// USUNIÊTO: if (cleanName == "this") return "[rbp+16]";
// Teraz "this" wpadnie do bloku poni¿ej i zostanie znalezione w mapie locals.
if (locals.count(cleanName)) {
int offset = locals.at(cleanName);
return "[rbp-" + std::to_string(offset) + "]";
}
// Zwracamy orygina³ (jeœli to np. nazwa etykiety)
return cleanName;
}
std::string generateAssembly(const CompilerState& state) {
// 1. NAG£ÓWEK I DEKLARACJE EXTERN
std::string result = "default rel\n";
result += "global main\n";
result += "extern printf\n";
result += "extern getchar\n";
result += "extern _getch\n";
result += "extern rand\n";
result += "extern srand\n";
result += "extern time\n";
result += "extern MessageBoxA\n";
// 2. SEKCJA DATA (Tylko raz!)
result += "section .data\n";
result += " fmt_int db \"%lld\", 10, 0\n"; // Format dla liczb
result += " fmt_str db \"%s\", 10, 0\n"; // Format dla stringów
// Zrzucamy stringi: ETYKIETA db "TRESC", 0
for (const auto& p : state.stringLiterals) {
result += " " + p.first + " db \"" + p.second + "\", 0\n";
}
// 3. SEKCJA TEXT (Kod programu)
result += "section .text\n";
for (const auto& pair : state.functions) {
const Function& func = pair.second;
result += func.name + ":\n";
result += " push rbp\n";
result += " mov rbp, rsp\n";
result += " sub rsp, 512\n"; // Zwiêkszy³em stos dla bezpieczeñstwa obiektów
std::map<std::string, int> stackMap;
int currentStack = 8;
// ARGUMENTY FUNKCJI
// RCX, RDX, R8, R9 - konwencja Windows x64 (shadow space obs³uguje caller)
// Jeœli funkcja jest metod¹, pierwszym argumentem jest 'this' (wskaŸnik na obiekt)
int argIdx = 0;
if (func.args.size() > 0) {
stackMap[func.args[0]] = currentStack;
result += " mov [rbp-" + std::to_string(currentStack) + "], rcx ; arg " + func.args[0] + "\n";
currentStack += 8;
argIdx++;
}
if (func.args.size() > 1) {
stackMap[func.args[1]] = currentStack;
result += " mov [rbp-" + std::to_string(currentStack) + "], rdx ; arg " + func.args[1] + "\n";
currentStack += 8;
argIdx++;
}
if (func.args.size() > 2) {
stackMap[func.args[2]] = currentStack;
result += " mov [rbp-" + std::to_string(currentStack) + "], r8 ; arg " + func.args[2] + "\n";
currentStack += 8;
argIdx++;
}
// GENEROWANIE INSTRUKCJI
for (const auto& instr : func.instructions) {
// Rezerwacja miejsca na stosie dla zmiennych
bool isWriteOp = (instr.type == OpType::ASSIGN ||
instr.type == OpType::ADD ||
instr.type == OpType::EQ ||
instr.type == OpType::SUB ||
instr.type == OpType::MUL ||
instr.type == OpType::DIV ||
instr.type == OpType::MOD ||
instr.type == OpType::LOGIC_AND ||
instr.type == OpType::LOGIC_OR ||
instr.type == OpType::MSGBOX ||
instr.type == OpType::ARRAY_DECLARE ||
instr.type == OpType::ARRAY_SET ||
instr.type == OpType::ALLOC_OBJECT ||
instr.type == OpType::LOAD_FIELD);
if (isWriteOp && stackMap.find(instr.arg1) == stackMap.end() && instr.arg1 != "RAX") {
stackMap[instr.arg1] = currentStack;
// Dla obiektów i tablic alokujemy wiêcej miejsca
if (instr.type == OpType::ALLOC_OBJECT) {
int size = std::stoi(instr.arg2);
currentStack += size;
}
else if (instr.type == OpType::ARRAY_DECLARE) {
int size = std::stoi(instr.arg2);
currentStack += (size * 8);
}
else {
currentStack += 8; // domyœlnie zmienna int/ptr
}
}
switch (instr.type) {
case OpType::ASSIGN: {
// Jeœli Ÿród³em jest wynik funkcji (RAX), nie generujemy "mov rax, eax"
if (instr.arg2 == "RAX") {
std::string dst = getVarLocation(instr.arg1, stackMap);
// POPRAWIONE: Tylko dwa backslashe, tak jak w reszcie kodu
result += " mov " + dst + ", rax\n";
}
else if (instr.arg3.find("ARRAY_IDX:") == 0) {
std::string indexStr = instr.arg3.substr(10);
std::string arrName = instr.arg2;
std::string dst = getVarLocation(instr.arg1, stackMap);
int baseOffset = stackMap[arrName];
if (isNumber(indexStr)) result += " mov rcx, " + indexStr + "\\n";
else result += " mov rcx, " + getVarLocation(indexStr, stackMap) + "\\n";
result += " imul rcx, 8\\n";
result += " mov rdx, rbp\\n";
result += " sub rdx, " + std::to_string(baseOffset) + "\\n";
result += " sub rdx, rcx\\n";
result += " mov rax, [rdx]\\n";
result += " mov " + dst + ", rax\\n";
}
else if (instr.arg3 == "STRING") {
std::string src = instr.arg2;
result += " lea rax, [rel " + src + "]\\n";
std::string dst = getVarLocation(instr.arg1, stackMap);
result += " mov qword " + dst + ", rax\\n";
}
else {
std::string srcLoc = getVarLocation(instr.arg2, stackMap);
std::string dst = getVarLocation(instr.arg1, stackMap);
std::string src = instr.arg2;
if (isNumber(src)) result += " mov rax, " + src + "\\n";
else result += " mov rax, " + srcLoc + "\\n";
result += " mov " + dst + ", rax\\n";
}
break;
}
case OpType::ALLOC_OBJECT: {
// Miejsce na stosie zosta³o zarezerwowane wy¿ej w pêtli (currentStack += size)
// Mo¿emy opcjonalnie wyzerowaæ pamiêæ (memset), ale na razie pomijamy dla prostoty
// Komentarz w ASM
result += " ; Alloc Object " + instr.arg1 + " size: " + instr.arg2 + "\n";
break;
}
case OpType::STORE_FIELD: {
// STORE_FIELD objName, offset, value
std::string objName = instr.arg1;
int offset = std::stoi(instr.arg2);
std::string valStr = instr.arg3;
// 1. Gdzie jest obiekt? (jego baza)
// Obiekt na stosie zaczyna siê pod [RBP - stackMap[objName]]
// Pola s¹ kolejne w dó³ stosu (bo stos roœnie w dó³, ale struktura ma dodatnie offsety...
// W C lokalne struktury: &obj to najni¿szy adres.
// U nas stackMap[obj] to "górny" adres (pierwsze zarezerwowane 8 bajtów).
// Przyjmijmy: adres_pola = (RBP - stackMap[objName]) - offset
// Pobierz wartoϾ do zapisania
if (isNumber(valStr)) {
result += " mov rax, " + valStr + "\n";
}
else {
result += " mov rax, " + getVarLocation(valStr, stackMap) + "\n";
}
int baseOffset = 0;
// SprawdŸ czy objName to "this"
if (objName == "this") {
// "this" jest wskaŸnikiem! Trzeba go za³adowaæ
std::string thisPtrLoc = getVarLocation("this", stackMap);
result += " mov rdx, " + thisPtrLoc + "\n"; // RDX = adres obiektu
// Adres pola = RDX - offset (tutaj uwaga: jeœli alokujemy na stosie "w dó³", to pola maj¹ ujemne offsety wzglêdem bazy?)
// Zróbmy proœciej: w 'ALLOC_OBJECT' rezerwujemy blok.
// [RBP - base] to pocz¹tek (pole 0).
// [RBP - base - 8] to pole 1 (offset 8).
// Czyli adres = RBP - base - offset.
// ALE: "this" przekazany do funkcji to wskaŸnik na ten obszar w pamiêci.
// Jeœli przekazujemy adres zmiennej lokalnej (LEA), to wskaŸnik pokazuje na [RBP-base].
// Wiêc [RDX - offset] powinno zadzia³aæ.
result += " sub rdx, " + std::to_string(offset) + "\n";
result += " mov [rdx], rax\n";
}
else {
// Obiekt lokalny na stosie
baseOffset = stackMap[objName];
result += " mov rdx, rbp\n";
result += " sub rdx, " + std::to_string(baseOffset) + "\n";
result += " sub rdx, " + std::to_string(offset) + "\n";
result += " mov [rdx], rax\n";
}
break;
}
case OpType::LOAD_FIELD: {
// LOAD_FIELD destVar, objName, offset
std::string destVar = instr.arg1; // gdzie zapisaæ wynik
std::string objName = instr.arg2; // sk¹d czytaæ
int offset = std::stoi(instr.arg3); // offset pola
// 1. Oblicz adres pola
if (objName == "this") {
std::string thisPtrLoc = getVarLocation("this", stackMap);
result += " mov rdx, " + thisPtrLoc + "\n";
result += " sub rdx, " + std::to_string(offset) + "\n";
}
else {
int baseOffset = stackMap[objName];
result += " mov rdx, rbp\n";
result += " sub rdx, " + std::to_string(baseOffset) + "\n";
result += " sub rdx, " + std::to_string(offset) + "\n";
}
// 2. Pobierz wartoϾ
result += " mov rax, [rdx]\n";
// 3. Zapisz do zmiennej docelowej
std::string destLoc = getVarLocation(destVar, stackMap);
result += " mov " + destLoc + ", rax\n";
break;
}
case OpType::ADD: {
std::string op1 = getVarLocation(instr.arg2, stackMap);
std::string op2 = getVarLocation(instr.arg3, stackMap);
std::string dst = getVarLocation(instr.arg1, stackMap);
result += " mov eax, " + op1 + "\n";
result += " add eax, " + op2 + "\n";
result += " mov " + dst + ", eax\n";
break;
}
case OpType::SUB: {
std::string op1 = getVarLocation(instr.arg2, stackMap);
std::string op2 = getVarLocation(instr.arg3, stackMap);
std::string dst = getVarLocation(instr.arg1, stackMap);
result += " mov eax, " + op1 + "\n";
result += " sub eax, " + op2 + "\n";
result += " mov " + dst + ", eax\n";
break;
}
case OpType::MUL: {
std::string op1 = getVarLocation(instr.arg2, stackMap);
std::string op2 = getVarLocation(instr.arg3, stackMap);
std::string dst = getVarLocation(instr.arg1, stackMap);
result += " mov eax, " + op1 + "\n";
result += " imul eax, " + op2 + "\n";
result += " mov " + dst + ", eax\n";
break;
}
case OpType::DIV: {
std::string op1 = getVarLocation(instr.arg2, stackMap);
std::string op2 = getVarLocation(instr.arg3, stackMap);
std::string dst = getVarLocation(instr.arg1, stackMap);
result += " mov eax, " + op1 + "\n";
result += " cdq\n";
if (isdigit(op2[0]) || op2[0] == '-') {
result += " mov ecx, " + op2 + "\n";
result += " idiv ecx\n";
}
else {
result += " idiv dword " + op2 + "\n";
}
result += " mov " + dst + ", eax\n";
break;
}
case OpType::MOD: {
std::string op1 = getVarLocation(instr.arg2, stackMap);
std::string op2 = getVarLocation(instr.arg3, stackMap);
std::string dst = getVarLocation(instr.arg1, stackMap);
result += " mov eax, " + op1 + "\n";
result += " cdq\n";
if (isdigit(op2[0]) || op2[0] == '-') {
result += " mov ecx, " + op2 + "\n";
result += " idiv ecx\n";
}
else {
result += " idiv dword " + op2 + "\n";
}
result += " mov " + dst + ", edx\n";
break;
}
case OpType::EQ: {
std::string op1 = getVarLocation(instr.arg2, stackMap);
std::string op2 = getVarLocation(instr.arg3, stackMap);
std::string dst = getVarLocation(instr.arg1, stackMap);
result += " mov eax, " + op1 + "\n";
result += " cmp eax, " + op2 + "\n";
result += " sete al\n";
result += " movzx eax, al\n";
result += " mov " + dst + ", eax\n";
break;
}
case OpType::LOGIC_AND: {
std::string op1 = getVarLocation(instr.arg2, stackMap);
std::string op2 = getVarLocation(instr.arg3, stackMap);
std::string dst = getVarLocation(instr.arg1, stackMap);
result += " mov eax, " + op1 + "\n";
result += " cmp eax, 0\n";
result += " setne al\n";
if (isdigit(op2[0])) result += " mov ecx, " + op2 + "\n";
else result += " mov ecx, " + op2 + "\n";
result += " cmp ecx, 0\n";
result += " setne cl\n";
result += " and al, cl\n";
result += " movzx eax, al\n";
result += " mov " + dst + ", eax\n";
break;
}
case OpType::LOGIC_OR: {
std::string op1 = getVarLocation(instr.arg2, stackMap);
std::string op2 = getVarLocation(instr.arg3, stackMap);
std::string dst = getVarLocation(instr.arg1, stackMap);
result += " mov eax, " + op1 + "\n";
result += " cmp eax, 0\n";
result += " setne al\n";
if (isdigit(op2[0])) result += " mov ecx, " + op2 + "\n";
else result += " mov ecx, " + op2 + "\n";
result += " cmp ecx, 0\n";
result += " setne cl\n";
result += " or al, cl\n";
result += " movzx eax, al\n";
result += " mov " + dst + ", eax\n";
break;
}
case OpType::JMP_FALSE: {
std::string condRaw = instr.arg2;
std::string cond = getVarLocation(condRaw, stackMap);
result += " mov eax, " + cond + "\n";
result += " test eax, eax\n";
result += " je " + instr.arg1 + "\n";
break;
}
case OpType::JMP: {
result += " jmp " + instr.arg1 + "\n";
break;
}
case OpType::ARRAY_DECLARE: {
// Obs³u¿one przy alokacji stosu
break;
}
case OpType::ARRAY_SET: {
std::string arrName = instr.arg1;
std::string indexStr = instr.arg2;
std::string valStr = instr.arg3;
if (isNumber(valStr)) result += " mov rax, " + valStr + "\n";
else {
std::string valLoc = getVarLocation(valStr, stackMap);
result += " mov rax, " + valLoc + "\n";
}
int baseOffset = stackMap[arrName];
if (isNumber(indexStr)) result += " mov rcx, " + indexStr + "\n";
else {
std::string idxLoc = getVarLocation(indexStr, stackMap);
result += " mov rcx, " + idxLoc + "\n";
}
result += " imul rcx, 8\n";
result += " mov rdx, rbp\n";
result += " sub rdx, " + std::to_string(baseOffset) + "\n";
result += " sub rdx, rcx\n";
result += " mov [rdx], rax\n";
break;
}
case OpType::LABEL: {
result += instr.arg1 + ":\n";
break;
}
case OpType::PRINT: {
std::string val = getVarLocation(instr.arg1, stackMap);
result += " mov edx, " + val + "\n";
result += " lea rcx, [rel fmt_int]\n";
result += " xor eax, eax\n";
result += " call printf\n";
break;
}
case OpType::PRINT_STRING: {
std::string target = instr.arg1;
if (target.rfind("str_", 0) == 0) {
result += " lea rdx, [rel " + target + "]\n";
}
else {
std::string val = getVarLocation(target, stackMap);
result += " mov rdx, " + val + "\n";
}
result += " lea rcx, [rel fmt_str]\n";
result += " xor eax, eax\n";
result += " call printf\n";
break;
}
case OpType::CALL: {
if (instr.arg1 == "input" || instr.arg1 == "read_key") {
result += " call _getch\n";
break;
}
if (instr.arg1 == "sys_seed") {
result += " mov rcx, 0\n";
result += " call time\n";
result += " mov rcx, rax\n";
result += " call srand\n";
break;
}
if (instr.arg1 == "sys_rand") {
result += " call rand\n";
break;
}
// Call metody/funkcji
std::string funcName = instr.arg1;
std::string argsRaw = instr.arg2;
std::vector<std::string> callArgs;
if (!argsRaw.empty()) {
std::stringstream ss(argsRaw);
std::string segment;
while (std::getline(ss, segment, ',')) {
callArgs.push_back(segment);
}
}
// Przygotowanie argumentów dla Windows x64 (RCX, RDX, R8, R9)
// Argument 0 (RCX) - ewentualnie 'this'
if (callArgs.size() > 0) {
// Czy to 'this' (nazwa obiektu)?
std::string arg0 = callArgs[0];
if (state.varTypes.count(arg0) && state.classes.count(state.varTypes.at(arg0))) {
// Przekazujemy ADRES obiektu (pointer)
// Obiekt jest na stosie: [RBP - offset]
// Adres to: RBP - offset
int offset = stackMap.at(arg0);
result += " lea rcx, [rbp-" + std::to_string(offset) + "]\n";
}
else if (arg0 == "this") {
// Przekazujemy this dalej
result += " mov rcx, [rbp+16]\n"; // zak³adaj¹c ¿e this jest w shadow space? nie, my go kopiujemy na stos
// Wróæmy do logiki argumentów: argumenty funkcji s¹ kopiowane na stos lokalny.
// arg0 ("this") jest w stackMap["this"].
std::string loc = getVarLocation("this", stackMap);
result += " mov rcx, " + loc + "\n";
}
else {
// Zwyk³a zmienna / liczba
std::string val = getVarLocation(arg0, stackMap);
if (isNumber(val)) result += " mov rcx, " + val + "\n";
else result += " movsxd rcx, dword " + val + "\n";
}
}
if (callArgs.size() > 1) {
std::string val = getVarLocation(callArgs[1], stackMap);
if (isNumber(val)) result += " mov rdx, " + val + "\n";
else result += " movsxd rdx, dword " + val + "\n";
}
// ... (dalsze argumenty R8, R9 jeœli potrzebujesz)
result += " call " + funcName + "\n";
break;
}
case OpType::RETURN: {
std::string val = getVarLocation(instr.arg1, stackMap);
if (!val.empty() && val != ";") {
result += " mov eax, " + val + "\n";
}
result += " leave\n";
result += " ret\n";
break;
}
case OpType::MSGBOX: {
std::string title = instr.arg1;
std::string text = instr.arg2;
if (text.find("str_") == 0) result += " lea rdx, [rel " + text + "]\n";
else {
std::string loc = getVarLocation(text, stackMap);
result += " mov rdx, " + loc + "\n";
}
if (title.find("str_") == 0) result += " lea r8, [rel " + title + "]\n";
else {
std::string loc = getVarLocation(title, stackMap);
result += " mov r8, " + loc + "\n";
}
result += " mov rcx, 0\n";
result += " mov r9, 0\n";
result += " call MessageBoxA\n";
break;
}
}
}
if (func.returnType == "void") {
result += " leave\n ret\n";
}
result += "\n";
}
return result;
}