commit 192146b99db363403de855ff565f1cd3054a0d3c Author: SirYwell Date: Mon May 5 00:22:11 2025 +0200 Initial commit diff --git a/README.md b/README.md new file mode 100644 index 0000000..96469d2 --- /dev/null +++ b/README.md @@ -0,0 +1,88 @@ +# Starter Code: Java + +This project contains starter code written in Java 24. +It contains: + +- A lexer for L1 +- A parser for L1 +- Semantic analysis for L1 +- SSA translation and IR +- Code generation for an abstract assembly + +Furthermore, the starter code also provides working `build.sh` and `run.sh` files. + +## Code Overview + +The starter code is meant to spare you some initial work on things that are not covered +by the lecture at the time of the first lab. +You will most likely need to touch large parts of the existing code sooner or later, +so we recommend going through it for a basic understanding of what is going on. + +Remember that you are free to modify any code. + +### Lexer & Tokens + +The lexer lazily produces tokens from an input string. +Invalid input parts will generate `ErrorToken`s. + +### Parser & AST + +The parser is a handwritten, recursive-descent parser. +You can choose other technologies (e.g., ANTLR), but expanding this parser as needed +might be a good exercise to deepen your understanding. + +The parser does not implement any kind of error recovery. +Instead, it just throws an exception as soon as the first problem is encountered. +You can implement error recovery, but it is not mandatory. + +### Semantic Analysis + +The semantic analysis in Lab 1 is just very basic. +You will need to expand it in future labs. +Similar to the parser, error handling is only very basic. + +### SSA translation & IR + +The SSA IR is inspired by [libFirm](https://libfirm.github.io/) and [Sea-of-Nodes](https://github.com/SeaOfNodes/). +It might be helpful to study these to get a better understanding of what is going on. +The implementation also showcases how SSA translation can directly apply optimizations. + +In the first lab, you don't need to understand SSA in full detail. +However, register allocation on chordal graphs depends on SSA. +For Lab 1, register allocation can also be done just using the AST, +but that means you'll likely have to rewrite more code in future labs. +It can still make sense to start with simple, naive implementations to have something working early on. + +### Code generation + +This is more or less just a placeholder. +You most likely just want to fully replace it with your register allocation and instruction selection. + +## Debugging Utilities + +There is a chance something won't work on the first try. +To figure out the cause, we provide utilities that ease debugging. + +- `edu.kit.kastel.vads.compiler.parser.Printer` allows printing the AST. + As it inserts many parentheses, it can be helpful when debugging precedence problems. +- `edu.kit.kastel.vads.compiler.ir.util.GraphVizPrinter` can generate output in the DOT format. + There are online tools (e.g., https://magjac.com/graphviz-visual-editor/) that can visualize that output. + It allows debugging anything related to the IR. + +We also try to keep track of source positions as much as possible through the compiler. +You can get rid of all that, but it can be helpful to track down where something comes from. + +## Miscellaneous + +### Nullability + +This project uses [jspecify](https://jspecify.dev/). +The `module-info.java` is annotated with `@NullMarked`, +meaning uses of `null` must be annotated, and not-null is assumed otherwise. + +### Gradle + +This project provides the wrapper for Gradle 8.14. +Additionally, the `application` plugin is used to easily specify the main class and build ready-to-use executables. +To ease setup ceremony, +the `foojay-resolver-convention` is used to automatically download a JDK matching the toolchain configuration. \ No newline at end of file diff --git a/build.gradle.kts b/build.gradle.kts new file mode 100644 index 0000000..b3f652d --- /dev/null +++ b/build.gradle.kts @@ -0,0 +1,30 @@ +plugins { + id("java") + application +} + +group = "edu.kit.kastel.logic" +version = "1.0-SNAPSHOT" + +application { + mainModule = "edu.kit.kastel.vads.compiler" + mainClass = "edu.kit.kastel.vads.compiler.Main" +} + +repositories { + mavenCentral() +} + +dependencies { + implementation("org.jspecify:jspecify:1.0.0") + testImplementation(platform("org.junit:junit-bom:5.10.0")) + testImplementation("org.junit.jupiter:junit-jupiter") +} + +java { + toolchain.languageVersion = JavaLanguageVersion.of(24) +} + +tasks.test { + useJUnitPlatform() +} \ No newline at end of file diff --git a/build.sh b/build.sh new file mode 100755 index 0000000..7b6c3d0 --- /dev/null +++ b/build.sh @@ -0,0 +1,4 @@ +#!/usr/bin/env sh +set -e +cd "$(dirname "$0")" +./gradlew --no-daemon installDist diff --git a/gradle/wrapper/gradle-wrapper.jar b/gradle/wrapper/gradle-wrapper.jar new file mode 100644 index 0000000..1b33c55 Binary files /dev/null and b/gradle/wrapper/gradle-wrapper.jar differ diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties new file mode 100644 index 0000000..ca025c8 --- /dev/null +++ b/gradle/wrapper/gradle-wrapper.properties @@ -0,0 +1,7 @@ +distributionBase=GRADLE_USER_HOME +distributionPath=wrapper/dists +distributionUrl=https\://services.gradle.org/distributions/gradle-8.14-bin.zip +networkTimeout=10000 +validateDistributionUrl=true +zipStoreBase=GRADLE_USER_HOME +zipStorePath=wrapper/dists diff --git a/gradlew b/gradlew new file mode 100755 index 0000000..23d15a9 --- /dev/null +++ b/gradlew @@ -0,0 +1,251 @@ +#!/bin/sh + +# +# Copyright © 2015-2021 the original authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: Apache-2.0 +# + +############################################################################## +# +# Gradle start up script for POSIX generated by Gradle. +# +# Important for running: +# +# (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is +# noncompliant, but you have some other compliant shell such as ksh or +# bash, then to run this script, type that shell name before the whole +# command line, like: +# +# ksh Gradle +# +# Busybox and similar reduced shells will NOT work, because this script +# requires all of these POSIX shell features: +# * functions; +# * expansions «$var», «${var}», «${var:-default}», «${var+SET}», +# «${var#prefix}», «${var%suffix}», and «$( cmd )»; +# * compound commands having a testable exit status, especially «case»; +# * various built-in commands including «command», «set», and «ulimit». +# +# Important for patching: +# +# (2) This script targets any POSIX shell, so it avoids extensions provided +# by Bash, Ksh, etc; in particular arrays are avoided. +# +# The "traditional" practice of packing multiple parameters into a +# space-separated string is a well documented source of bugs and security +# problems, so this is (mostly) avoided, by progressively accumulating +# options in "$@", and eventually passing that to Java. +# +# Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS, +# and GRADLE_OPTS) rely on word-splitting, this is performed explicitly; +# see the in-line comments for details. +# +# There are tweaks for specific operating systems such as AIX, CygWin, +# Darwin, MinGW, and NonStop. +# +# (3) This script is generated from the Groovy template +# https://github.com/gradle/gradle/blob/HEAD/platforms/jvm/plugins-application/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt +# within the Gradle project. +# +# You can find Gradle at https://github.com/gradle/gradle/. +# +############################################################################## + +# Attempt to set APP_HOME + +# Resolve links: $0 may be a link +app_path=$0 + +# Need this for daisy-chained symlinks. +while + APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path + [ -h "$app_path" ] +do + ls=$( ls -ld "$app_path" ) + link=${ls#*' -> '} + case $link in #( + /*) app_path=$link ;; #( + *) app_path=$APP_HOME$link ;; + esac +done + +# This is normally unused +# shellcheck disable=SC2034 +APP_BASE_NAME=${0##*/} +# Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036) +APP_HOME=$( cd -P "${APP_HOME:-./}" > /dev/null && printf '%s\n' "$PWD" ) || exit + +# Use the maximum available, or set MAX_FD != -1 to use that value. +MAX_FD=maximum + +warn () { + echo "$*" +} >&2 + +die () { + echo + echo "$*" + echo + exit 1 +} >&2 + +# OS specific support (must be 'true' or 'false'). +cygwin=false +msys=false +darwin=false +nonstop=false +case "$( uname )" in #( + CYGWIN* ) cygwin=true ;; #( + Darwin* ) darwin=true ;; #( + MSYS* | MINGW* ) msys=true ;; #( + NONSTOP* ) nonstop=true ;; +esac + +CLASSPATH="\\\"\\\"" + + +# Determine the Java command to use to start the JVM. +if [ -n "$JAVA_HOME" ] ; then + if [ -x "$JAVA_HOME/jre/sh/java" ] ; then + # IBM's JDK on AIX uses strange locations for the executables + JAVACMD=$JAVA_HOME/jre/sh/java + else + JAVACMD=$JAVA_HOME/bin/java + fi + if [ ! -x "$JAVACMD" ] ; then + die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." + fi +else + JAVACMD=java + if ! command -v java >/dev/null 2>&1 + then + die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." + fi +fi + +# Increase the maximum file descriptors if we can. +if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then + case $MAX_FD in #( + max*) + # In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked. + # shellcheck disable=SC2039,SC3045 + MAX_FD=$( ulimit -H -n ) || + warn "Could not query maximum file descriptor limit" + esac + case $MAX_FD in #( + '' | soft) :;; #( + *) + # In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked. + # shellcheck disable=SC2039,SC3045 + ulimit -n "$MAX_FD" || + warn "Could not set maximum file descriptor limit to $MAX_FD" + esac +fi + +# Collect all arguments for the java command, stacking in reverse order: +# * args from the command line +# * the main class name +# * -classpath +# * -D...appname settings +# * --module-path (only if needed) +# * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables. + +# For Cygwin or MSYS, switch paths to Windows format before running java +if "$cygwin" || "$msys" ; then + APP_HOME=$( cygpath --path --mixed "$APP_HOME" ) + CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" ) + + JAVACMD=$( cygpath --unix "$JAVACMD" ) + + # Now convert the arguments - kludge to limit ourselves to /bin/sh + for arg do + if + case $arg in #( + -*) false ;; # don't mess with options #( + /?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath + [ -e "$t" ] ;; #( + *) false ;; + esac + then + arg=$( cygpath --path --ignore --mixed "$arg" ) + fi + # Roll the args list around exactly as many times as the number of + # args, so each arg winds up back in the position where it started, but + # possibly modified. + # + # NB: a `for` loop captures its iteration list before it begins, so + # changing the positional parameters here affects neither the number of + # iterations, nor the values presented in `arg`. + shift # remove old arg + set -- "$@" "$arg" # push replacement arg + done +fi + + +# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' + +# Collect all arguments for the java command: +# * DEFAULT_JVM_OPTS, JAVA_OPTS, and optsEnvironmentVar are not allowed to contain shell fragments, +# and any embedded shellness will be escaped. +# * For example: A user cannot expect ${Hostname} to be expanded, as it is an environment variable and will be +# treated as '${Hostname}' itself on the command line. + +set -- \ + "-Dorg.gradle.appname=$APP_BASE_NAME" \ + -classpath "$CLASSPATH" \ + -jar "$APP_HOME/gradle/wrapper/gradle-wrapper.jar" \ + "$@" + +# Stop when "xargs" is not available. +if ! command -v xargs >/dev/null 2>&1 +then + die "xargs is not available" +fi + +# Use "xargs" to parse quoted args. +# +# With -n1 it outputs one arg per line, with the quotes and backslashes removed. +# +# In Bash we could simply go: +# +# readarray ARGS < <( xargs -n1 <<<"$var" ) && +# set -- "${ARGS[@]}" "$@" +# +# but POSIX shell has neither arrays nor command substitution, so instead we +# post-process each arg (as a line of input to sed) to backslash-escape any +# character that might be a shell metacharacter, then use eval to reverse +# that process (while maintaining the separation between arguments), and wrap +# the whole thing up as a single "set" statement. +# +# This will of course break if any of these variables contains a newline or +# an unmatched quote. +# + +eval "set -- $( + printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" | + xargs -n1 | + sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' | + tr '\n' ' ' + )" '"$@"' + +exec "$JAVACMD" "$@" diff --git a/gradlew.bat b/gradlew.bat new file mode 100644 index 0000000..5eed7ee --- /dev/null +++ b/gradlew.bat @@ -0,0 +1,94 @@ +@rem +@rem Copyright 2015 the original author or authors. +@rem +@rem Licensed under the Apache License, Version 2.0 (the "License"); +@rem you may not use this file except in compliance with the License. +@rem You may obtain a copy of the License at +@rem +@rem https://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, software +@rem distributed under the License is distributed on an "AS IS" BASIS, +@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@rem See the License for the specific language governing permissions and +@rem limitations under the License. +@rem +@rem SPDX-License-Identifier: Apache-2.0 +@rem + +@if "%DEBUG%"=="" @echo off +@rem ########################################################################## +@rem +@rem Gradle startup script for Windows +@rem +@rem ########################################################################## + +@rem Set local scope for the variables with windows NT shell +if "%OS%"=="Windows_NT" setlocal + +set DIRNAME=%~dp0 +if "%DIRNAME%"=="" set DIRNAME=. +@rem This is normally unused +set APP_BASE_NAME=%~n0 +set APP_HOME=%DIRNAME% + +@rem Resolve any "." and ".." in APP_HOME to make it shorter. +for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi + +@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" + +@rem Find java.exe +if defined JAVA_HOME goto findJavaFromJavaHome + +set JAVA_EXE=java.exe +%JAVA_EXE% -version >NUL 2>&1 +if %ERRORLEVEL% equ 0 goto execute + +echo. 1>&2 +echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 1>&2 +echo. 1>&2 +echo Please set the JAVA_HOME variable in your environment to match the 1>&2 +echo location of your Java installation. 1>&2 + +goto fail + +:findJavaFromJavaHome +set JAVA_HOME=%JAVA_HOME:"=% +set JAVA_EXE=%JAVA_HOME%/bin/java.exe + +if exist "%JAVA_EXE%" goto execute + +echo. 1>&2 +echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 1>&2 +echo. 1>&2 +echo Please set the JAVA_HOME variable in your environment to match the 1>&2 +echo location of your Java installation. 1>&2 + +goto fail + +:execute +@rem Setup the command line + +set CLASSPATH= + + +@rem Execute Gradle +"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" -jar "%APP_HOME%\gradle\wrapper\gradle-wrapper.jar" %* + +:end +@rem End local scope for the variables with windows NT shell +if %ERRORLEVEL% equ 0 goto mainEnd + +:fail +rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of +rem the _cmd.exe /c_ return code! +set EXIT_CODE=%ERRORLEVEL% +if %EXIT_CODE% equ 0 set EXIT_CODE=1 +if not ""=="%GRADLE_EXIT_CONSOLE%" exit %EXIT_CODE% +exit /b %EXIT_CODE% + +:mainEnd +if "%OS%"=="Windows_NT" endlocal + +:omega diff --git a/run.sh b/run.sh new file mode 100755 index 0000000..0374615 --- /dev/null +++ b/run.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env sh +BIN_DIR="$(dirname "$0")/build/install/compiler/bin" +$BIN_DIR/compiler "$@" diff --git a/settings.gradle.kts b/settings.gradle.kts new file mode 100644 index 0000000..040499b --- /dev/null +++ b/settings.gradle.kts @@ -0,0 +1,5 @@ +rootProject.name = "compiler" // in case you want to change the name: it is used by run.sh too + +plugins { + id("org.gradle.toolchains.foojay-resolver-convention") version "0.10.0" +} \ No newline at end of file diff --git a/src/main/java/edu/kit/kastel/vads/compiler/Main.java b/src/main/java/edu/kit/kastel/vads/compiler/Main.java new file mode 100644 index 0000000..ac9f909 --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/Main.java @@ -0,0 +1,61 @@ +package edu.kit.kastel.vads.compiler; + +import edu.kit.kastel.vads.compiler.backend.aasm.CodeGenerator; +import edu.kit.kastel.vads.compiler.ir.IrGraph; +import edu.kit.kastel.vads.compiler.ir.SsaTranslation; +import edu.kit.kastel.vads.compiler.ir.optimize.LocalValueNumbering; +import edu.kit.kastel.vads.compiler.lexer.Lexer; +import edu.kit.kastel.vads.compiler.parser.ParseException; +import edu.kit.kastel.vads.compiler.parser.Parser; +import edu.kit.kastel.vads.compiler.parser.TokenSource; +import edu.kit.kastel.vads.compiler.parser.ast.FunctionTree; +import edu.kit.kastel.vads.compiler.parser.ast.ProgramTree; +import edu.kit.kastel.vads.compiler.semantic.SemanticAnalysis; +import edu.kit.kastel.vads.compiler.semantic.SemanticException; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.List; + +public class Main { + public static void main(String[] args) throws IOException { + if (args.length != 2) { + System.err.println("Invalid arguments: Expected one input file and one output file"); + System.exit(3); + } + Path input = Path.of(args[0]); + Path output = Path.of(args[1]); + ProgramTree program = lexAndParse(input); + try { + new SemanticAnalysis(program).analyze(); + } catch (SemanticException e) { + e.printStackTrace(); + System.exit(2); + return; + } + List graphs = new ArrayList<>(); + for (FunctionTree function : program.topLevelTrees()) { + SsaTranslation translation = new SsaTranslation(function, new LocalValueNumbering()); + graphs.add(translation.translate()); + } + + // TODO: generate assembly and invoke gcc instead of generating abstract assembly + String s = new CodeGenerator().generateCode(graphs); + Files.writeString(output, s); + } + + private static ProgramTree lexAndParse(Path input) throws IOException { + try { + Lexer lexer = Lexer.forString(Files.readString(input)); + TokenSource tokenSource = new TokenSource(lexer); + Parser parser = new Parser(tokenSource); + return parser.parseProgram(); + } catch (ParseException e) { + e.printStackTrace(); + System.exit(1); + throw new AssertionError("unreachable"); + } + } +} \ No newline at end of file diff --git a/src/main/java/edu/kit/kastel/vads/compiler/Position.java b/src/main/java/edu/kit/kastel/vads/compiler/Position.java new file mode 100644 index 0000000..a9fffcc --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/Position.java @@ -0,0 +1,13 @@ +package edu.kit.kastel.vads.compiler; + +public sealed interface Position { + int line(); + int column(); + + record SimplePosition(int line, int column) implements Position { + @Override + public String toString() { + return line() + ":" + column(); + } + } +} diff --git a/src/main/java/edu/kit/kastel/vads/compiler/Span.java b/src/main/java/edu/kit/kastel/vads/compiler/Span.java new file mode 100644 index 0000000..fb877ca --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/Span.java @@ -0,0 +1,20 @@ +package edu.kit.kastel.vads.compiler; + +public sealed interface Span { + Position start(); + Position end(); + + Span merge(Span later); + + record SimpleSpan(Position start, Position end) implements Span { + @Override + public Span merge(Span later) { + return new SimpleSpan(start(), later.end()); + } + + @Override + public String toString() { + return "[" + start() + "|" + end() + "]"; + } + } +} diff --git a/src/main/java/edu/kit/kastel/vads/compiler/backend/aasm/AasmRegisterAllocator.java b/src/main/java/edu/kit/kastel/vads/compiler/backend/aasm/AasmRegisterAllocator.java new file mode 100644 index 0000000..759f644 --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/backend/aasm/AasmRegisterAllocator.java @@ -0,0 +1,43 @@ +package edu.kit.kastel.vads.compiler.backend.aasm; + +import edu.kit.kastel.vads.compiler.backend.regalloc.Register; +import edu.kit.kastel.vads.compiler.backend.regalloc.RegisterAllocator; +import edu.kit.kastel.vads.compiler.ir.IrGraph; +import edu.kit.kastel.vads.compiler.ir.node.Block; +import edu.kit.kastel.vads.compiler.ir.node.Node; +import edu.kit.kastel.vads.compiler.ir.node.ProjNode; +import edu.kit.kastel.vads.compiler.ir.node.ReturnNode; +import edu.kit.kastel.vads.compiler.ir.node.StartNode; + +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +public class AasmRegisterAllocator implements RegisterAllocator { + private int id; + private final Map registers = new HashMap<>(); + + @Override + public Map allocateRegisters(IrGraph graph) { + Set visited = new HashSet<>(); + visited.add(graph.endBlock()); + scan(graph.endBlock(), visited); + return Map.copyOf(this.registers); + } + + private void scan(Node node, Set visited) { + for (Node predecessor : node.predecessors()) { + if (visited.add(predecessor)) { + scan(predecessor, visited); + } + } + if (needsRegister(node)) { + this.registers.put(node, new VirtualRegister(this.id++)); + } + } + + private static boolean needsRegister(Node node) { + return !(node instanceof ProjNode || node instanceof StartNode || node instanceof Block || node instanceof ReturnNode); + } +} diff --git a/src/main/java/edu/kit/kastel/vads/compiler/backend/aasm/CodeGenerator.java b/src/main/java/edu/kit/kastel/vads/compiler/backend/aasm/CodeGenerator.java new file mode 100644 index 0000000..7a9b177 --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/backend/aasm/CodeGenerator.java @@ -0,0 +1,89 @@ +package edu.kit.kastel.vads.compiler.backend.aasm; + +import edu.kit.kastel.vads.compiler.backend.regalloc.Register; +import edu.kit.kastel.vads.compiler.ir.IrGraph; +import edu.kit.kastel.vads.compiler.ir.node.AddNode; +import edu.kit.kastel.vads.compiler.ir.node.BinaryOperationNode; +import edu.kit.kastel.vads.compiler.ir.node.Block; +import edu.kit.kastel.vads.compiler.ir.node.ConstIntNode; +import edu.kit.kastel.vads.compiler.ir.node.DivNode; +import edu.kit.kastel.vads.compiler.ir.node.ModNode; +import edu.kit.kastel.vads.compiler.ir.node.MulNode; +import edu.kit.kastel.vads.compiler.ir.node.Node; +import edu.kit.kastel.vads.compiler.ir.node.Phi; +import edu.kit.kastel.vads.compiler.ir.node.ProjNode; +import edu.kit.kastel.vads.compiler.ir.node.ReturnNode; +import edu.kit.kastel.vads.compiler.ir.node.StartNode; +import edu.kit.kastel.vads.compiler.ir.node.SubNode; + +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import static edu.kit.kastel.vads.compiler.ir.util.NodeSupport.predecessorSkipProj; + +public class CodeGenerator { + + public String generateCode(List program) { + StringBuilder builder = new StringBuilder(); + for (IrGraph graph : program) { + AasmRegisterAllocator allocator = new AasmRegisterAllocator(); + Map registers = allocator.allocateRegisters(graph); + builder.append("function ") + .append(graph.name()) + .append(" {\n"); + generateForGraph(graph, builder, registers); + builder.append("}"); + } + return builder.toString(); + } + + private void generateForGraph(IrGraph graph, StringBuilder builder, Map registers) { + Set visited = new HashSet<>(); + scan(graph.endBlock(), visited, builder, registers); + } + + private void scan(Node node, Set visited, StringBuilder builder, Map registers) { + for (Node predecessor : node.predecessors()) { + if (visited.add(predecessor)) { + scan(predecessor, visited, builder, registers); + } + } + + switch (node) { + case AddNode add -> binary(builder, registers, add, "add"); + case SubNode sub -> binary(builder, registers, sub, "sub"); + case MulNode mul -> binary(builder, registers, mul, "mul"); + case DivNode div -> binary(builder, registers, div, "div"); + case ModNode mod -> binary(builder, registers, mod, "mod"); + case ReturnNode r -> builder.repeat(" ", 2).append("ret ") + .append(registers.get(predecessorSkipProj(r, ReturnNode.RESULT))); + case ConstIntNode c -> builder.repeat(" ", 2) + .append(registers.get(c)) + .append(" = const ") + .append(c.value()); + case Phi _ -> throw new UnsupportedOperationException("phi"); + case Block _, ProjNode _, StartNode _ -> { + // do nothing, skip line break + return; + } + } + builder.append("\n"); + } + + private static void binary( + StringBuilder builder, + Map registers, + BinaryOperationNode node, + String opcode + ) { + builder.repeat(" ", 2).append(registers.get(node)) + .append(" = ") + .append(opcode) + .append(" ") + .append(registers.get(predecessorSkipProj(node, BinaryOperationNode.LEFT))) + .append(" ") + .append(registers.get(predecessorSkipProj(node, BinaryOperationNode.RIGHT))); + } +} diff --git a/src/main/java/edu/kit/kastel/vads/compiler/backend/aasm/VirtualRegister.java b/src/main/java/edu/kit/kastel/vads/compiler/backend/aasm/VirtualRegister.java new file mode 100644 index 0000000..0121a6d --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/backend/aasm/VirtualRegister.java @@ -0,0 +1,10 @@ +package edu.kit.kastel.vads.compiler.backend.aasm; + +import edu.kit.kastel.vads.compiler.backend.regalloc.Register; + +public record VirtualRegister(int id) implements Register { + @Override + public String toString() { + return "%" + id(); + } +} diff --git a/src/main/java/edu/kit/kastel/vads/compiler/backend/regalloc/Register.java b/src/main/java/edu/kit/kastel/vads/compiler/backend/regalloc/Register.java new file mode 100644 index 0000000..426fc5e --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/backend/regalloc/Register.java @@ -0,0 +1,4 @@ +package edu.kit.kastel.vads.compiler.backend.regalloc; + +public interface Register { +} diff --git a/src/main/java/edu/kit/kastel/vads/compiler/backend/regalloc/RegisterAllocator.java b/src/main/java/edu/kit/kastel/vads/compiler/backend/regalloc/RegisterAllocator.java new file mode 100644 index 0000000..210d6b6 --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/backend/regalloc/RegisterAllocator.java @@ -0,0 +1,11 @@ +package edu.kit.kastel.vads.compiler.backend.regalloc; + +import edu.kit.kastel.vads.compiler.ir.IrGraph; +import edu.kit.kastel.vads.compiler.ir.node.Node; + +import java.util.Map; + +public interface RegisterAllocator { + + Map allocateRegisters(IrGraph graph); +} diff --git a/src/main/java/edu/kit/kastel/vads/compiler/ir/GraphConstructor.java b/src/main/java/edu/kit/kastel/vads/compiler/ir/GraphConstructor.java new file mode 100644 index 0000000..66f8e10 --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/ir/GraphConstructor.java @@ -0,0 +1,192 @@ +package edu.kit.kastel.vads.compiler.ir; + +import edu.kit.kastel.vads.compiler.ir.node.AddNode; +import edu.kit.kastel.vads.compiler.ir.node.Block; +import edu.kit.kastel.vads.compiler.ir.node.ConstIntNode; +import edu.kit.kastel.vads.compiler.ir.node.DivNode; +import edu.kit.kastel.vads.compiler.ir.node.ModNode; +import edu.kit.kastel.vads.compiler.ir.node.MulNode; +import edu.kit.kastel.vads.compiler.ir.node.Node; +import edu.kit.kastel.vads.compiler.ir.node.Phi; +import edu.kit.kastel.vads.compiler.ir.node.ProjNode; +import edu.kit.kastel.vads.compiler.ir.node.ReturnNode; +import edu.kit.kastel.vads.compiler.ir.node.StartNode; +import edu.kit.kastel.vads.compiler.ir.node.SubNode; +import edu.kit.kastel.vads.compiler.ir.optimize.Optimizer; +import edu.kit.kastel.vads.compiler.parser.symbol.Name; + +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +class GraphConstructor { + + private final Optimizer optimizer; + private final IrGraph graph; + private final Map> currentDef = new HashMap<>(); + private final Map> incompletePhis = new HashMap<>(); + private final Map currentSideEffect = new HashMap<>(); + private final Map incompleteSideEffectPhis = new HashMap<>(); + private final Set sealedBlocks = new HashSet<>(); + private Block currentBlock; + + public GraphConstructor(Optimizer optimizer, String name) { + this.optimizer = optimizer; + this.graph = new IrGraph(name); + this.currentBlock = this.graph.startBlock(); + // the start block never gets any more predecessors + sealBlock(this.currentBlock); + } + + public Node newStart() { + assert currentBlock() == this.graph.startBlock() : "start must be in start block"; + return new StartNode(currentBlock()); + } + + public Node newAdd(Node left, Node right) { + return this.optimizer.transform(new AddNode(currentBlock(), left, right)); + } + public Node newSub(Node left, Node right) { + return this.optimizer.transform(new SubNode(currentBlock(), left, right)); + } + + public Node newMul(Node left, Node right) { + return this.optimizer.transform(new MulNode(currentBlock(), left, right)); + } + + public Node newDiv(Node left, Node right) { + return this.optimizer.transform(new DivNode(currentBlock(), left, right, readCurrentSideEffect())); + } + + public Node newMod(Node left, Node right) { + return this.optimizer.transform(new ModNode(currentBlock(), left, right, readCurrentSideEffect())); + } + + public Node newReturn(Node result) { + return new ReturnNode(currentBlock(), readCurrentSideEffect(), result); + } + + public Node newConstInt(int value) { + // always move const into start block, this allows better deduplication + // and resultingly in better value numbering + return this.optimizer.transform(new ConstIntNode(this.graph.startBlock(), value)); + } + + public Node newSideEffectProj(Node node) { + return new ProjNode(currentBlock(), node, ProjNode.SimpleProjectionInfo.SIDE_EFFECT); + } + + public Node newResultProj(Node node) { + return new ProjNode(currentBlock(), node, ProjNode.SimpleProjectionInfo.RESULT); + } + + public Block currentBlock() { + return this.currentBlock; + } + + public Phi newPhi() { + // don't transform phi directly, it is not ready yet + return new Phi(currentBlock()); + } + + public IrGraph graph() { + return this.graph; + } + + void writeVariable(Name variable, Block block, Node value) { + this.currentDef.computeIfAbsent(variable, _ -> new HashMap<>()).put(block, value); + } + + Node readVariable(Name variable, Block block) { + Node node = this.currentDef.getOrDefault(variable, Map.of()).get(block); + if (node != null) { + return node; + } + return readVariableRecursive(variable, block); + } + + + private Node readVariableRecursive(Name variable, Block block) { + Node val; + if (!this.sealedBlocks.contains(block)) { + val = newPhi(); + this.incompletePhis.computeIfAbsent(block, _ -> new HashMap<>()).put(variable, (Phi) val); + } else if (block.predecessors().size() == 1) { + val = readVariable(variable, block.predecessors().getFirst().block()); + } else { + val = newPhi(); + writeVariable(variable, block, val); + val = addPhiOperands(variable, (Phi) val); + } + writeVariable(variable, block, val); + return val; + } + + Node addPhiOperands(Name variable, Phi phi) { + for (Node pred : phi.block().predecessors()) { + phi.appendOperand(readVariable(variable, pred.block())); + } + return tryRemoveTrivialPhi(phi); + } + + Node tryRemoveTrivialPhi(Phi phi) { + // TODO: the paper shows how to remove trivial phis. + // as this is not a problem in Lab 1 and it is just + // a simplification, we recommend to implement this + // part yourself. + return phi; + } + + void sealBlock(Block block) { + for (Map.Entry entry : this.incompletePhis.getOrDefault(block, Map.of()).entrySet()) { + addPhiOperands(entry.getKey(), entry.getValue()); + } + this.sealedBlocks.add(block); + } + + public void writeCurrentSideEffect(Node node) { + writeSideEffect(currentBlock(), node); + } + + private void writeSideEffect(Block block, Node node) { + this.currentSideEffect.put(block, node); + } + + public Node readCurrentSideEffect() { + return readSideEffect(currentBlock()); + } + + private Node readSideEffect(Block block) { + Node node = this.currentSideEffect.get(block); + if (node != null) { + return node; + } + return readSideEffectRecursive(block); + } + + private Node readSideEffectRecursive(Block block) { + Node val; + if (!this.sealedBlocks.contains(block)) { + val = newPhi(); + Phi old = this.incompleteSideEffectPhis.put(block, (Phi) val); + assert old == null : "double readSideEffectRecursive for " + block; + } else if (block.predecessors().size() == 1) { + val = readSideEffect(block.predecessors().getFirst().block()); + } else { + val = newPhi(); + writeSideEffect(block, val); + val = addPhiOperands((Phi) val); + } + writeSideEffect(block, val); + return val; + } + + Node addPhiOperands(Phi phi) { + for (Node pred : phi.block().predecessors()) { + phi.appendOperand(readSideEffect(pred.block())); + } + return tryRemoveTrivialPhi(phi); + } + +} diff --git a/src/main/java/edu/kit/kastel/vads/compiler/ir/IrGraph.java b/src/main/java/edu/kit/kastel/vads/compiler/ir/IrGraph.java new file mode 100644 index 0000000..bcf391b --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/ir/IrGraph.java @@ -0,0 +1,53 @@ +package edu.kit.kastel.vads.compiler.ir; + +import edu.kit.kastel.vads.compiler.ir.node.Block; +import edu.kit.kastel.vads.compiler.ir.node.Node; + +import java.util.IdentityHashMap; +import java.util.LinkedHashSet; +import java.util.Map; +import java.util.SequencedSet; +import java.util.Set; + +public class IrGraph { + private final Map> successors = new IdentityHashMap<>(); + private final Block startBlock; + private final Block endBlock; + private final String name; + + public IrGraph(String name) { + this.name = name; + this.startBlock = new Block(this); + this.endBlock = new Block(this); + } + + public void registerSuccessor(Node node, Node successor) { + this.successors.computeIfAbsent(node, _ -> new LinkedHashSet<>()).add(successor); + } + + public void removeSuccessor(Node node, Node oldSuccessor) { + this.successors.computeIfAbsent(node, _ -> new LinkedHashSet<>()).remove(oldSuccessor); + } + + /// {@return the set of nodes that have the given node as one of their inputs} + public Set successors(Node node) { + SequencedSet successors = this.successors.get(node); + if (successors == null) { + return Set.of(); + } + return Set.copyOf(successors); + } + + public Block startBlock() { + return this.startBlock; + } + + public Block endBlock() { + return this.endBlock; + } + + /// {@return the name of this graph} + public String name() { + return name; + } +} diff --git a/src/main/java/edu/kit/kastel/vads/compiler/ir/SsaTranslation.java b/src/main/java/edu/kit/kastel/vads/compiler/ir/SsaTranslation.java new file mode 100644 index 0000000..dac150f --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/ir/SsaTranslation.java @@ -0,0 +1,227 @@ +package edu.kit.kastel.vads.compiler.ir; + +import edu.kit.kastel.vads.compiler.ir.node.Block; +import edu.kit.kastel.vads.compiler.ir.node.DivNode; +import edu.kit.kastel.vads.compiler.ir.node.ModNode; +import edu.kit.kastel.vads.compiler.ir.node.Node; +import edu.kit.kastel.vads.compiler.ir.optimize.Optimizer; +import edu.kit.kastel.vads.compiler.ir.util.DebugInfo; +import edu.kit.kastel.vads.compiler.ir.util.DebugInfoHelper; +import edu.kit.kastel.vads.compiler.parser.ast.AssignmentTree; +import edu.kit.kastel.vads.compiler.parser.ast.BinaryOperationTree; +import edu.kit.kastel.vads.compiler.parser.ast.BlockTree; +import edu.kit.kastel.vads.compiler.parser.ast.DeclarationTree; +import edu.kit.kastel.vads.compiler.parser.ast.FunctionTree; +import edu.kit.kastel.vads.compiler.parser.ast.IdentExpressionTree; +import edu.kit.kastel.vads.compiler.parser.ast.LValueIdentTree; +import edu.kit.kastel.vads.compiler.parser.ast.LiteralTree; +import edu.kit.kastel.vads.compiler.parser.ast.NameTree; +import edu.kit.kastel.vads.compiler.parser.ast.NegateTree; +import edu.kit.kastel.vads.compiler.parser.ast.ProgramTree; +import edu.kit.kastel.vads.compiler.parser.ast.ReturnTree; +import edu.kit.kastel.vads.compiler.parser.ast.StatementTree; +import edu.kit.kastel.vads.compiler.parser.ast.Tree; +import edu.kit.kastel.vads.compiler.parser.ast.TypeTree; +import edu.kit.kastel.vads.compiler.parser.symbol.Name; +import edu.kit.kastel.vads.compiler.parser.visitor.Visitor; + +import java.util.ArrayDeque; +import java.util.Deque; +import java.util.Optional; +import java.util.function.BinaryOperator; + +/// SSA translation as described in +/// [`Simple and Efficient Construction of Static Single Assignment Form`](https://compilers.cs.uni-saarland.de/papers/bbhlmz13cc.pdf). +/// +/// This implementation also tracks side effect edges that can be used to avoid reordering of operations that cannot be +/// reordered. +/// +/// We recommend to read the paper to better understand the mechanics implemented here. +public class SsaTranslation { + private final FunctionTree function; + private final GraphConstructor constructor; + + public SsaTranslation(FunctionTree function, Optimizer optimizer) { + this.function = function; + this.constructor = new GraphConstructor(optimizer, function.name().name().asString()); + } + + public IrGraph translate() { + var visitor = new SsaTranslationVisitor(); + this.function.accept(visitor, this); + return this.constructor.graph(); + } + + private void writeVariable(Name variable, Block block, Node value) { + this.constructor.writeVariable(variable, block, value); + } + + private Node readVariable(Name variable, Block block) { + return this.constructor.readVariable(variable, block); + } + + private Block currentBlock() { + return this.constructor.currentBlock(); + } + + private static class SsaTranslationVisitor implements Visitor> { + + @SuppressWarnings("OptionalUsedAsFieldOrParameterType") + private static final Optional NOT_AN_EXPRESSION = Optional.empty(); + + private final Deque debugStack = new ArrayDeque<>(); + + private void pushSpan(Tree tree) { + this.debugStack.push(DebugInfoHelper.getDebugInfo()); + DebugInfoHelper.setDebugInfo(new DebugInfo.SourceInfo(tree.span())); + } + + private void popSpan() { + DebugInfoHelper.setDebugInfo(this.debugStack.pop()); + } + + @Override + public Optional visit(AssignmentTree assignmentTree, SsaTranslation data) { + pushSpan(assignmentTree); + BinaryOperator desugar = switch (assignmentTree.operator().type()) { + case ASSIGN_MINUS -> data.constructor::newSub; + case ASSIGN_PLUS -> data.constructor::newAdd; + case ASSIGN_MUL -> data.constructor::newMul; + case ASSIGN_DIV -> (lhs, rhs) -> projResultDivMod(data, data.constructor.newDiv(lhs, rhs)); + case ASSIGN_MOD -> (lhs, rhs) -> projResultDivMod(data, data.constructor.newMod(lhs, rhs)); + case ASSIGN -> null; + default -> + throw new IllegalArgumentException("not an assignment operator " + assignmentTree.operator()); + }; + + switch (assignmentTree.lValue()) { + case LValueIdentTree(var name) -> { + Node rhs = assignmentTree.expression().accept(this, data).orElseThrow(); + if (desugar != null) { + rhs = desugar.apply(data.readVariable(name.name(), data.currentBlock()), rhs); + } + data.writeVariable(name.name(), data.currentBlock(), rhs); + } + } + popSpan(); + return NOT_AN_EXPRESSION; + } + + @Override + public Optional visit(BinaryOperationTree binaryOperationTree, SsaTranslation data) { + pushSpan(binaryOperationTree); + Node lhs = binaryOperationTree.lhs().accept(this, data).orElseThrow(); + Node rhs = binaryOperationTree.rhs().accept(this, data).orElseThrow(); + Node res = switch (binaryOperationTree.operatorType()) { + case MINUS -> data.constructor.newSub(lhs, rhs); + case PLUS -> data.constructor.newAdd(lhs, rhs); + case MUL -> data.constructor.newMul(lhs, rhs); + case DIV -> projResultDivMod(data, data.constructor.newDiv(lhs, rhs)); + case MOD -> projResultDivMod(data, data.constructor.newMod(lhs, rhs)); + default -> + throw new IllegalArgumentException("not a binary expression operator " + binaryOperationTree.operatorType()); + }; + popSpan(); + return Optional.of(res); + } + + @Override + public Optional visit(BlockTree blockTree, SsaTranslation data) { + pushSpan(blockTree); + for (StatementTree statement : blockTree.statements()) { + statement.accept(this, data); + } + popSpan(); + return NOT_AN_EXPRESSION; + } + + @Override + public Optional visit(DeclarationTree declarationTree, SsaTranslation data) { + pushSpan(declarationTree); + if (declarationTree.initializer() != null) { + Node rhs = declarationTree.initializer().accept(this, data).orElseThrow(); + data.writeVariable(declarationTree.name().name(), data.currentBlock(), rhs); + } + popSpan(); + return NOT_AN_EXPRESSION; + } + + @Override + public Optional visit(FunctionTree functionTree, SsaTranslation data) { + pushSpan(functionTree); + Node start = data.constructor.newStart(); + data.constructor.writeCurrentSideEffect(data.constructor.newSideEffectProj(start)); + functionTree.body().accept(this, data); + popSpan(); + return NOT_AN_EXPRESSION; + } + + @Override + public Optional visit(IdentExpressionTree identExpressionTree, SsaTranslation data) { + pushSpan(identExpressionTree); + Node value = data.readVariable(identExpressionTree.name().name(), data.currentBlock()); + popSpan(); + return Optional.of(value); + } + + @Override + public Optional visit(LiteralTree literalTree, SsaTranslation data) { + pushSpan(literalTree); + Node node = data.constructor.newConstInt((int) literalTree.value()); + popSpan(); + return Optional.of(node); + } + + @Override + public Optional visit(LValueIdentTree lValueIdentTree, SsaTranslation data) { + return NOT_AN_EXPRESSION; + } + + @Override + public Optional visit(NameTree nameTree, SsaTranslation data) { + return NOT_AN_EXPRESSION; + } + + @Override + public Optional visit(NegateTree negateTree, SsaTranslation data) { + pushSpan(negateTree); + Node node = negateTree.expression().accept(this, data).orElseThrow(); + Node res = data.constructor.newSub(data.constructor.newConstInt(0), node); + popSpan(); + return Optional.of(res); + } + + @Override + public Optional visit(ProgramTree programTree, SsaTranslation data) { + throw new UnsupportedOperationException(); + } + + @Override + public Optional visit(ReturnTree returnTree, SsaTranslation data) { + pushSpan(returnTree); + Node node = returnTree.expression().accept(this, data).orElseThrow(); + Node ret = data.constructor.newReturn(node); + data.constructor.graph().endBlock().addPredecessor(ret); + popSpan(); + return NOT_AN_EXPRESSION; + } + + @Override + public Optional visit(TypeTree typeTree, SsaTranslation data) { + throw new UnsupportedOperationException(); + } + + private Node projResultDivMod(SsaTranslation data, Node divMod) { + // make sure we actually have a div or a mod, as optimizations could + // have changed it to something else already + if (!(divMod instanceof DivNode || divMod instanceof ModNode)) { + return divMod; + } + Node projSideEffect = data.constructor.newSideEffectProj(divMod); + data.constructor.writeCurrentSideEffect(projSideEffect); + return data.constructor.newResultProj(divMod); + } + } + + +} diff --git a/src/main/java/edu/kit/kastel/vads/compiler/ir/node/AddNode.java b/src/main/java/edu/kit/kastel/vads/compiler/ir/node/AddNode.java new file mode 100644 index 0000000..57bea90 --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/ir/node/AddNode.java @@ -0,0 +1,19 @@ +package edu.kit.kastel.vads.compiler.ir.node; + +public final class AddNode extends BinaryOperationNode { + + public AddNode(Block block, Node left, Node right) { + super(block, left, right); + } + + @SuppressWarnings("EqualsDoesntCheckParameterClass") // we do, but not here + @Override + public boolean equals(Object obj) { + return commutativeEquals(this, obj); + } + + @Override + public int hashCode() { + return commutativeHashCode(this); + } +} diff --git a/src/main/java/edu/kit/kastel/vads/compiler/ir/node/BinaryOperationNode.java b/src/main/java/edu/kit/kastel/vads/compiler/ir/node/BinaryOperationNode.java new file mode 100644 index 0000000..3715618 --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/ir/node/BinaryOperationNode.java @@ -0,0 +1,50 @@ +package edu.kit.kastel.vads.compiler.ir.node; + +public sealed abstract class BinaryOperationNode extends Node permits AddNode, DivNode, ModNode, MulNode, SubNode { + public static final int LEFT = 0; + public static final int RIGHT = 1; + + protected BinaryOperationNode(Block block, Node left, Node right) { + super(block, left, right); + } + + protected BinaryOperationNode(Block block, Node left, Node right, Node sideEffect) { + super(block, left, right, sideEffect); + } + + protected static int commutativeHashCode(BinaryOperationNode node) { + int h = node.block().hashCode(); + // commutative operation: we want h(op(x, y)) == h(op(y, x)) + h += 31 * (node.predecessor(LEFT).hashCode() ^ node.predecessor(RIGHT).hashCode()); + return h; + } + + protected static boolean commutativeEquals(BinaryOperationNode a, Object bObj) { + if (!(bObj instanceof BinaryOperationNode b)) { + return false; + } + if (a.getClass() != b.getClass()) { + return false; + } + if (a.predecessor(LEFT) == b.predecessor(LEFT) && a.predecessor(RIGHT) == b.predecessor(RIGHT)) { + return true; + } + // commutative operation: op(x, y) == op(y, x) + return a.predecessor(LEFT) == b.predecessor(RIGHT) && a.predecessor(RIGHT) == b.predecessor(LEFT); + } + + @Override + public boolean equals(Object obj) { + if (!(obj instanceof BinaryOperationNode binOp)) { + return false; + } + return obj.getClass() == this.getClass() + && this.predecessor(LEFT) == binOp.predecessor(LEFT) + && this.predecessor(RIGHT) == binOp.predecessor(RIGHT); + } + + @Override + public int hashCode() { + return (this.predecessor(LEFT).hashCode() * 31 + this.predecessor(RIGHT).hashCode()) ^ this.getClass().hashCode(); + } +} diff --git a/src/main/java/edu/kit/kastel/vads/compiler/ir/node/Block.java b/src/main/java/edu/kit/kastel/vads/compiler/ir/node/Block.java new file mode 100644 index 0000000..e168cdc --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/ir/node/Block.java @@ -0,0 +1,11 @@ +package edu.kit.kastel.vads.compiler.ir.node; + +import edu.kit.kastel.vads.compiler.ir.IrGraph; + +public final class Block extends Node { + + public Block(IrGraph graph) { + super(graph); + } + +} diff --git a/src/main/java/edu/kit/kastel/vads/compiler/ir/node/ConstIntNode.java b/src/main/java/edu/kit/kastel/vads/compiler/ir/node/ConstIntNode.java new file mode 100644 index 0000000..a5426a9 --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/ir/node/ConstIntNode.java @@ -0,0 +1,32 @@ +package edu.kit.kastel.vads.compiler.ir.node; + +public final class ConstIntNode extends Node { + private final int value; + + public ConstIntNode(Block block, int value) { + super(block); + this.value = value; + } + + public int value() { + return this.value; + } + + @Override + public boolean equals(Object obj) { + if (obj instanceof ConstIntNode c) { + return this.block() == c.block() && c.value == this.value; + } + return false; + } + + @Override + public int hashCode() { + return this.value; + } + + @Override + protected String info() { + return "[" + this.value + "]"; + } +} diff --git a/src/main/java/edu/kit/kastel/vads/compiler/ir/node/DivNode.java b/src/main/java/edu/kit/kastel/vads/compiler/ir/node/DivNode.java new file mode 100644 index 0000000..a4a6ca7 --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/ir/node/DivNode.java @@ -0,0 +1,15 @@ +package edu.kit.kastel.vads.compiler.ir.node; + +public final class DivNode extends BinaryOperationNode { + public static final int SIDE_EFFECT = 2; + public DivNode(Block block, Node left, Node right, Node sideEffect) { + super(block, left, right, sideEffect); + } + + @Override + public boolean equals(Object obj) { + // side effect, must be very careful with value numbering. + // this is the most conservative approach + return obj == this; + } +} diff --git a/src/main/java/edu/kit/kastel/vads/compiler/ir/node/ModNode.java b/src/main/java/edu/kit/kastel/vads/compiler/ir/node/ModNode.java new file mode 100644 index 0000000..ccfadb7 --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/ir/node/ModNode.java @@ -0,0 +1,15 @@ +package edu.kit.kastel.vads.compiler.ir.node; + +public final class ModNode extends BinaryOperationNode { + public static final int SIDE_EFFECT = 2; + public ModNode(Block block, Node left, Node right, Node sideEffect) { + super(block, left, right, sideEffect); + } + + @Override + public boolean equals(Object obj) { + // side effect, must be very careful with value numbering. + // this is the most conservative approach + return obj == this; + } +} diff --git a/src/main/java/edu/kit/kastel/vads/compiler/ir/node/MulNode.java b/src/main/java/edu/kit/kastel/vads/compiler/ir/node/MulNode.java new file mode 100644 index 0000000..7915233 --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/ir/node/MulNode.java @@ -0,0 +1,18 @@ +package edu.kit.kastel.vads.compiler.ir.node; + +public final class MulNode extends BinaryOperationNode { + public MulNode(Block block, Node left, Node right) { + super(block, left, right); + } + + @SuppressWarnings("EqualsDoesntCheckParameterClass") // we do, but not here + @Override + public boolean equals(Object obj) { + return commutativeEquals(this, obj); + } + + @Override + public int hashCode() { + return commutativeHashCode(this); + } +} diff --git a/src/main/java/edu/kit/kastel/vads/compiler/ir/node/Node.java b/src/main/java/edu/kit/kastel/vads/compiler/ir/node/Node.java new file mode 100644 index 0000000..cd54aad --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/ir/node/Node.java @@ -0,0 +1,73 @@ +package edu.kit.kastel.vads.compiler.ir.node; + +import edu.kit.kastel.vads.compiler.ir.util.DebugInfo; +import edu.kit.kastel.vads.compiler.ir.IrGraph; +import edu.kit.kastel.vads.compiler.ir.util.DebugInfoHelper; + +import java.util.ArrayList; +import java.util.List; + +/// The base class for all nodes. +public sealed abstract class Node permits BinaryOperationNode, Block, ConstIntNode, Phi, ProjNode, ReturnNode, StartNode { + private final IrGraph graph; + private final Block block; + private final List predecessors = new ArrayList<>(); + private final DebugInfo debugInfo; + + protected Node(Block block, Node... predecessors) { + this.graph = block.graph(); + this.block = block; + this.predecessors.addAll(List.of(predecessors)); + for (Node predecessor : predecessors) { + graph.registerSuccessor(predecessor, this); + } + this.debugInfo = DebugInfoHelper.getDebugInfo(); + } + + protected Node(IrGraph graph) { + assert this.getClass() == Block.class : "must be used by Block only"; + this.graph = graph; + this.block = (Block) this; + this.debugInfo = DebugInfo.NoInfo.INSTANCE; + } + + public final IrGraph graph() { + return this.graph; + } + + public final Block block() { + return this.block; + } + + public final List predecessors() { + return List.copyOf(this.predecessors); + } + + public final void setPredecessor(int idx, Node node) { + this.graph.removeSuccessor(this.predecessors.get(idx), this); + this.predecessors.set(idx, node); + this.graph.registerSuccessor(node, this); + } + + public final void addPredecessor(Node node) { + this.predecessors.add(node); + this.graph.registerSuccessor(node, this); + } + + public final Node predecessor(int idx) { + return this.predecessors.get(idx); + } + + @Override + public final String toString() { + return (this.getClass().getSimpleName().replace("Node", "") + " " + info()).stripTrailing(); + } + + protected String info() { + return ""; + } + + public DebugInfo debugInfo() { + return debugInfo; + } +} diff --git a/src/main/java/edu/kit/kastel/vads/compiler/ir/node/Phi.java b/src/main/java/edu/kit/kastel/vads/compiler/ir/node/Phi.java new file mode 100644 index 0000000..2f183cf --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/ir/node/Phi.java @@ -0,0 +1,11 @@ +package edu.kit.kastel.vads.compiler.ir.node; + +public final class Phi extends Node { + public Phi(Block block) { + super(block); + } + + public void appendOperand(Node node) { + addPredecessor(node); + } +} diff --git a/src/main/java/edu/kit/kastel/vads/compiler/ir/node/ProjNode.java b/src/main/java/edu/kit/kastel/vads/compiler/ir/node/ProjNode.java new file mode 100644 index 0000000..041c2b5 --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/ir/node/ProjNode.java @@ -0,0 +1,24 @@ +package edu.kit.kastel.vads.compiler.ir.node; + +public final class ProjNode extends Node { + public static final int IN = 0; + private final ProjectionInfo projectionInfo; + + public ProjNode(Block block, Node in, ProjectionInfo projectionInfo) { + super(block, in); + this.projectionInfo = projectionInfo; + } + + @Override + protected String info() { + return this.projectionInfo.toString(); + } + + public sealed interface ProjectionInfo { + + } + + public enum SimpleProjectionInfo implements ProjectionInfo { + RESULT, SIDE_EFFECT + } +} diff --git a/src/main/java/edu/kit/kastel/vads/compiler/ir/node/ReturnNode.java b/src/main/java/edu/kit/kastel/vads/compiler/ir/node/ReturnNode.java new file mode 100644 index 0000000..ced3252 --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/ir/node/ReturnNode.java @@ -0,0 +1,9 @@ +package edu.kit.kastel.vads.compiler.ir.node; + +public final class ReturnNode extends Node { + public static final int SIDE_EFFECT = 0; + public static final int RESULT = 1; + public ReturnNode(Block block, Node sideEffect, Node result) { + super(block, sideEffect, result); + } +} diff --git a/src/main/java/edu/kit/kastel/vads/compiler/ir/node/StartNode.java b/src/main/java/edu/kit/kastel/vads/compiler/ir/node/StartNode.java new file mode 100644 index 0000000..9f0c16b --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/ir/node/StartNode.java @@ -0,0 +1,7 @@ +package edu.kit.kastel.vads.compiler.ir.node; + +public final class StartNode extends Node { + public StartNode(Block block) { + super(block); + } +} diff --git a/src/main/java/edu/kit/kastel/vads/compiler/ir/node/SubNode.java b/src/main/java/edu/kit/kastel/vads/compiler/ir/node/SubNode.java new file mode 100644 index 0000000..c0ede85 --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/ir/node/SubNode.java @@ -0,0 +1,7 @@ +package edu.kit.kastel.vads.compiler.ir.node; + +public final class SubNode extends BinaryOperationNode { + public SubNode(Block block, Node left, Node right) { + super(block, left, right); + } +} diff --git a/src/main/java/edu/kit/kastel/vads/compiler/ir/optimize/LocalValueNumbering.java b/src/main/java/edu/kit/kastel/vads/compiler/ir/optimize/LocalValueNumbering.java new file mode 100644 index 0000000..2f98b71 --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/ir/optimize/LocalValueNumbering.java @@ -0,0 +1,20 @@ +package edu.kit.kastel.vads.compiler.ir.optimize; + +import edu.kit.kastel.vads.compiler.ir.node.Node; + +import java.util.HashMap; +import java.util.Map; + +/// This depends on [Node#equals(java.lang.Object)] and [Node#hashCode()] methods. +/// As long as they take the block into account, it is only local, but replacement +/// is extremely simple. +/// When using classes like [HashMap] or [java.util.HashSet] without this optimization, +/// the [Node#equals(java.lang.Object)] and [Node#hashCode()] methods must be adjusted. +public class LocalValueNumbering implements Optimizer { + private final Map knownNodes = new HashMap<>(); + + @Override + public Node transform(Node node) { + return this.knownNodes.computeIfAbsent(node, n -> n); + } +} diff --git a/src/main/java/edu/kit/kastel/vads/compiler/ir/optimize/Optimizer.java b/src/main/java/edu/kit/kastel/vads/compiler/ir/optimize/Optimizer.java new file mode 100644 index 0000000..d561a8f --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/ir/optimize/Optimizer.java @@ -0,0 +1,9 @@ +package edu.kit.kastel.vads.compiler.ir.optimize; + +import edu.kit.kastel.vads.compiler.ir.node.Node; + +/// An interface that allows replacing a node with a more optimal one. +public interface Optimizer { + + Node transform(Node node); +} diff --git a/src/main/java/edu/kit/kastel/vads/compiler/ir/util/DebugInfo.java b/src/main/java/edu/kit/kastel/vads/compiler/ir/util/DebugInfo.java new file mode 100644 index 0000000..a2b93a9 --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/ir/util/DebugInfo.java @@ -0,0 +1,12 @@ +package edu.kit.kastel.vads.compiler.ir.util; + +import edu.kit.kastel.vads.compiler.Span; + +/// Provides information to ease debugging +public sealed interface DebugInfo { + enum NoInfo implements DebugInfo { + INSTANCE + } + + record SourceInfo(Span span) implements DebugInfo {} +} diff --git a/src/main/java/edu/kit/kastel/vads/compiler/ir/util/DebugInfoHelper.java b/src/main/java/edu/kit/kastel/vads/compiler/ir/util/DebugInfoHelper.java new file mode 100644 index 0000000..b01c573 --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/ir/util/DebugInfoHelper.java @@ -0,0 +1,16 @@ +package edu.kit.kastel.vads.compiler.ir.util; + +/// This is a dirty trick as we don't have Scoped Values. +/// It allows tracking debug info without having to pass it +/// down all the layers. +public final class DebugInfoHelper { + private static DebugInfo debugInfo = DebugInfo.NoInfo.INSTANCE; + + public static void setDebugInfo(DebugInfo debugInfo) { + DebugInfoHelper.debugInfo = debugInfo; + } + + public static DebugInfo getDebugInfo() { + return debugInfo; + } +} diff --git a/src/main/java/edu/kit/kastel/vads/compiler/ir/util/GraphVizPrinter.java b/src/main/java/edu/kit/kastel/vads/compiler/ir/util/GraphVizPrinter.java new file mode 100644 index 0000000..4c4a084 --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/ir/util/GraphVizPrinter.java @@ -0,0 +1,144 @@ +package edu.kit.kastel.vads.compiler.ir.util; + +import edu.kit.kastel.vads.compiler.Span; +import edu.kit.kastel.vads.compiler.ir.IrGraph; +import edu.kit.kastel.vads.compiler.ir.node.Block; +import edu.kit.kastel.vads.compiler.ir.node.Node; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.IdentityHashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; + +/// Outputs a DOT format string to visualize an [IrGraph]. +public class GraphVizPrinter { + private final Map> clusters = new HashMap<>(); + private final List edges = new ArrayList<>(); + private final Map ids = new HashMap<>(); + private final StringBuilder builder = new StringBuilder(); + private final IrGraph graph; + private int counter = 0; + + public GraphVizPrinter(IrGraph graph) { + this.graph = graph; + } + + public static String print(IrGraph graph) { + GraphVizPrinter printer = new GraphVizPrinter(graph); + printer.prepare(graph.endBlock(), new HashSet<>()); + printer.print(); + return printer.builder.toString(); + } + + private void prepare(Node node, Set seen) { + if (!seen.add(node)) { + return; + } + + if (!(node instanceof Block)) { + this.clusters.computeIfAbsent(node.block(), _ -> Collections.newSetFromMap(new IdentityHashMap<>())) + .add(node); + } + int idx = 0; + for (Node predecessor : node.predecessors()) { + this.edges.add(new Edge(predecessor, node, idx++)); + prepare(predecessor, seen); + } + if (node == this.graph.endBlock()) { + this.clusters.put(this.graph.endBlock(), Set.of()); + } + } + + private void print() { + this.builder.append("digraph \"") + .append(this.graph.name()) + .append("\"") + .append(""" + { + compound=true; + layout=dot; + node [shape=box]; + splines=ortho; + overlap=false; + + """); + + this.clusters.forEach((block, nodes) -> { + this.builder.append(" subgraph cluster_") + .append(idFor(block)) + .append(" {\n") + .repeat(" ", 8) + .append("c_").append(idFor(block)) + .append(" [width=0, height=0, fixedsize=true, style=invis];\n"); + if (block == this.graph.endBlock()) { + this.builder.repeat(" ", 8) + .append("label=End;\n"); + } + for (Node node : nodes) { + this.builder.repeat(" ", 8) + .append(idFor(node)) + .append(" [label=\"") + .append(labelFor(node)) + .append("\""); + if (node.debugInfo() instanceof DebugInfo.SourceInfo(Span span)) { + this.builder.append(", tooltip=\"") + .append("source span: ") + .append(span) + .append("\""); + } + this.builder.append("];\n"); + } + this.builder.append(" }\n\n"); + }); + + for (Edge edge : this.edges) { + this.builder.repeat(" ", 4) + .append(nameFor(edge.from())) + .append(" -> ") + .append(nameFor(edge.to())) + .append(" [") + .append("label=") + .append(edge.idx()); + + if (edge.from() instanceof Block b) { + this.builder.append(", ") + .append("ltail=") + .append("cluster_") + .append(idFor(b)); + } + if (edge.to() instanceof Block b) { + this.builder.append(", ") + .append("lhead=") + .append("cluster_") + .append(idFor(b)); + } + + this.builder.append("];\n"); + } + + this.builder.append("}"); + + } + + private int idFor(Node node) { + return this.ids.computeIfAbsent(node, _ -> this.counter++); + } + + private String nameFor(Node node) { + if (node instanceof Block) { + return "c_" + idFor(node); + } + return String.valueOf(idFor(node)); + } + + private String labelFor(Node node) { + return node.toString(); + } + + record Edge(Node from, Node to, int idx) { + } +} diff --git a/src/main/java/edu/kit/kastel/vads/compiler/ir/util/NodeSupport.java b/src/main/java/edu/kit/kastel/vads/compiler/ir/util/NodeSupport.java new file mode 100644 index 0000000..586d5c2 --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/ir/util/NodeSupport.java @@ -0,0 +1,18 @@ +package edu.kit.kastel.vads.compiler.ir.util; + +import edu.kit.kastel.vads.compiler.ir.node.Node; +import edu.kit.kastel.vads.compiler.ir.node.ProjNode; + +public final class NodeSupport { + private NodeSupport() { + + } + + public static Node predecessorSkipProj(Node node, int predIdx) { + Node pred = node.predecessor(predIdx); + if (pred instanceof ProjNode) { + return pred.predecessor(ProjNode.IN); + } + return pred; + } +} diff --git a/src/main/java/edu/kit/kastel/vads/compiler/lexer/ErrorToken.java b/src/main/java/edu/kit/kastel/vads/compiler/lexer/ErrorToken.java new file mode 100644 index 0000000..67f4cfb --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/lexer/ErrorToken.java @@ -0,0 +1,10 @@ +package edu.kit.kastel.vads.compiler.lexer; + +import edu.kit.kastel.vads.compiler.Span; + +public record ErrorToken(String value, Span span) implements Token { + @Override + public String asString() { + return value(); + } +} diff --git a/src/main/java/edu/kit/kastel/vads/compiler/lexer/Identifier.java b/src/main/java/edu/kit/kastel/vads/compiler/lexer/Identifier.java new file mode 100644 index 0000000..70e3f49 --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/lexer/Identifier.java @@ -0,0 +1,10 @@ +package edu.kit.kastel.vads.compiler.lexer; + +import edu.kit.kastel.vads.compiler.Span; + +public record Identifier(String value, Span span) implements Token { + @Override + public String asString() { + return value(); + } +} diff --git a/src/main/java/edu/kit/kastel/vads/compiler/lexer/Keyword.java b/src/main/java/edu/kit/kastel/vads/compiler/lexer/Keyword.java new file mode 100644 index 0000000..418686f --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/lexer/Keyword.java @@ -0,0 +1,15 @@ +package edu.kit.kastel.vads.compiler.lexer; + +import edu.kit.kastel.vads.compiler.Span; + +public record Keyword(KeywordType type, Span span) implements Token { + @Override + public boolean isKeyword(KeywordType keywordType) { + return type() == keywordType; + } + + @Override + public String asString() { + return type().keyword(); + } +} diff --git a/src/main/java/edu/kit/kastel/vads/compiler/lexer/KeywordType.java b/src/main/java/edu/kit/kastel/vads/compiler/lexer/KeywordType.java new file mode 100644 index 0000000..1f64913 --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/lexer/KeywordType.java @@ -0,0 +1,41 @@ +package edu.kit.kastel.vads.compiler.lexer; + +public enum KeywordType { + STRUCT("struct"), + IF("if"), + ELSE("else"), + WHILE("while"), + FOR("for"), + CONTINUE("continue"), + BREAK("break"), + RETURN("return"), + ASSERT("assert"), + TRUE("true"), + FALSE("false"), + NULL("NULL"), + PRINT("print"), + READ("read"), + ALLOC("alloc"), + ALLOC_ARRAY("alloc_array"), + INT("int"), + BOOL("bool"), + VOID("void"), + CHAR("char"), + STRING("string"), + ; + + private final String keyword; + + KeywordType(String keyword) { + this.keyword = keyword; + } + + public String keyword() { + return keyword; + } + + @Override + public String toString() { + return keyword(); + } +} diff --git a/src/main/java/edu/kit/kastel/vads/compiler/lexer/Lexer.java b/src/main/java/edu/kit/kastel/vads/compiler/lexer/Lexer.java new file mode 100644 index 0000000..b6324bc --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/lexer/Lexer.java @@ -0,0 +1,215 @@ +package edu.kit.kastel.vads.compiler.lexer; + +import edu.kit.kastel.vads.compiler.Position; +import edu.kit.kastel.vads.compiler.Span; +import edu.kit.kastel.vads.compiler.lexer.Operator.OperatorType; +import edu.kit.kastel.vads.compiler.lexer.Separator.SeparatorType; +import org.jspecify.annotations.Nullable; + +import java.util.Optional; + +public class Lexer { + private final String source; + private int pos; + private int lineStart; + private int line; + + private Lexer(String source) { + this.source = source; + } + + public static Lexer forString(String source) { + return new Lexer(source); + } + + public Optional nextToken() { + ErrorToken error = skipWhitespace(); + if (error != null) { + return Optional.of(error); + } + if (this.pos >= this.source.length()) { + return Optional.empty(); + } + Token t = switch (peek()) { + case '(' -> separator(SeparatorType.PAREN_OPEN); + case ')' -> separator(SeparatorType.PAREN_CLOSE); + case '{' -> separator(SeparatorType.BRACE_OPEN); + case '}' -> separator(SeparatorType.BRACE_CLOSE); + case ';' -> separator(SeparatorType.SEMICOLON); + case '-' -> singleOrAssign(OperatorType.MINUS, OperatorType.ASSIGN_MINUS); + case '+' -> singleOrAssign(OperatorType.PLUS, OperatorType.ASSIGN_PLUS); + case '*' -> singleOrAssign(OperatorType.MUL, OperatorType.ASSIGN_MUL); + case '/' -> singleOrAssign(OperatorType.DIV, OperatorType.ASSIGN_DIV); + case '%' -> singleOrAssign(OperatorType.MOD, OperatorType.ASSIGN_MOD); + case '=' -> new Operator(OperatorType.ASSIGN, buildSpan(1)); + default -> { + if (isIdentifierChar(peek())) { + if (isNumeric(peek())) { + yield lexNumber(); + } + yield lexIdentifierOrKeyword(); + } + yield new ErrorToken(String.valueOf(peek()), buildSpan(1)); + } + }; + + return Optional.of(t); + } + + private @Nullable ErrorToken skipWhitespace() { + enum CommentType { + SINGLE_LINE, + MULTI_LINE + } + CommentType currentCommentType = null; + int multiLineCommentDepth = 0; + int commentStart = -1; + while (hasMore(0)) { + switch (peek()) { + case ' ', '\t' -> this.pos++; + case '\n', '\r' -> { + this.pos++; + this.lineStart = this.pos; + this.line++; + if (currentCommentType == CommentType.SINGLE_LINE) { + currentCommentType = null; + } + } + case '/' -> { + if (currentCommentType == CommentType.SINGLE_LINE) { + this.pos++; + continue; + } + if (hasMore(1)) { + if (peek(1) == '/' && currentCommentType == null) { + currentCommentType = CommentType.SINGLE_LINE; + } else if (peek(1) == '*') { + currentCommentType = CommentType.MULTI_LINE; + multiLineCommentDepth++; + } else { + return null; + } + commentStart = this.pos; + this.pos += 2; + continue; + } + // are we in a multi line comment of any depth? + if (multiLineCommentDepth > 0) { + this.pos++; + continue; + } + return null; + } + default -> { + if (currentCommentType == CommentType.MULTI_LINE) { + if (peek() == '*' && hasMore(1) && peek(1) == '/') { + this.pos += 2; + multiLineCommentDepth--; + currentCommentType = multiLineCommentDepth == 0 ? null : CommentType.MULTI_LINE; + } else { + this.pos++; + } + continue; + } else if (currentCommentType == CommentType.SINGLE_LINE) { + this.pos++; + continue; + } + return null; + } + } + } + if (!hasMore(0) && currentCommentType == CommentType.MULTI_LINE) { + return new ErrorToken(this.source.substring(commentStart), buildSpan(0)); + } + return null; + } + + private Separator separator(SeparatorType parenOpen) { + return new Separator(parenOpen, buildSpan(1)); + } + + private Token lexIdentifierOrKeyword() { + int off = 1; + while (hasMore(off) && isIdentifierChar(peek(off))) { + off++; + } + String id = this.source.substring(this.pos, this.pos + off); + // This is a naive solution. Using a better data structure (hashmap, trie) likely performs better. + for (KeywordType value : KeywordType.values()) { + if (value.keyword().equals(id)) { + return new Keyword(value, buildSpan(off)); + } + } + return new Identifier(id, buildSpan(off)); + } + + private Token lexNumber() { + if (isHexPrefix()) { + int off = 2; + while (hasMore(off) && isHex(peek(off))) { + off++; + } + if (off == 2) { + // 0x without any further hex digits + return new ErrorToken(this.source.substring(this.pos, this.pos + off), buildSpan(2)); + } + return new NumberLiteral(this.source.substring(this.pos, this.pos + off), 16, buildSpan(off)); + } + int off = 1; + while (hasMore(off) && isNumeric(peek(off))) { + off++; + } + if (peek() == '0' && off > 1) { + // leading zero is not allowed + return new ErrorToken(this.source.substring(this.pos, this.pos + off), buildSpan(off)); + } + return new NumberLiteral(this.source.substring(this.pos, this.pos + off), 10, buildSpan(off)); + } + + private boolean isHexPrefix() { + return peek() == '0' && hasMore(1) && (peek(1) == 'x' || peek(1) == 'X'); + } + + private boolean isIdentifierChar(char c) { + return c == '_' + || c >= 'a' && c <= 'z' + || c >= 'A' && c <= 'Z' + || c >= '0' && c <= '9'; + } + + private boolean isNumeric(char c) { + return c >= '0' && c <= '9'; + } + + private boolean isHex(char c) { + return isNumeric(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'); + } + + private Token singleOrAssign(OperatorType single, OperatorType assign) { + if (hasMore(1) && peek(1) == '=') { + return new Operator(assign, buildSpan(2)); + } + return new Operator(single, buildSpan(1)); + } + + private Span buildSpan(int proceed) { + int start = this.pos; + this.pos += proceed; + Position.SimplePosition s = new Position.SimplePosition(this.line, start - this.lineStart); + Position.SimplePosition e = new Position.SimplePosition(this.line, start - this.lineStart + proceed); + return new Span.SimpleSpan(s, e); + } + + private char peek() { + return this.source.charAt(this.pos); + } + + private boolean hasMore(int offset) { + return this.pos + offset < this.source.length(); + } + + private char peek(int offset) { + return this.source.charAt(this.pos + offset); + } + +} diff --git a/src/main/java/edu/kit/kastel/vads/compiler/lexer/NumberLiteral.java b/src/main/java/edu/kit/kastel/vads/compiler/lexer/NumberLiteral.java new file mode 100644 index 0000000..2f333ed --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/lexer/NumberLiteral.java @@ -0,0 +1,10 @@ +package edu.kit.kastel.vads.compiler.lexer; + +import edu.kit.kastel.vads.compiler.Span; + +public record NumberLiteral(String value, int base, Span span) implements Token { + @Override + public String asString() { + return value(); + } +} diff --git a/src/main/java/edu/kit/kastel/vads/compiler/lexer/Operator.java b/src/main/java/edu/kit/kastel/vads/compiler/lexer/Operator.java new file mode 100644 index 0000000..eabd054 --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/lexer/Operator.java @@ -0,0 +1,42 @@ +package edu.kit.kastel.vads.compiler.lexer; + +import edu.kit.kastel.vads.compiler.Span; + +public record Operator(OperatorType type, Span span) implements Token { + + @Override + public boolean isOperator(OperatorType operatorType) { + return type() == operatorType; + } + + @Override + public String asString() { + return type().toString(); + } + + public enum OperatorType { + ASSIGN_MINUS("-="), + MINUS("-"), + ASSIGN_PLUS("+="), + PLUS("+"), + MUL("*"), + ASSIGN_MUL("*="), + ASSIGN_DIV("/="), + DIV("/"), + ASSIGN_MOD("%="), + MOD("%"), + ASSIGN("="), + ; + + private final String value; + + OperatorType(String value) { + this.value = value; + } + + @Override + public String toString() { + return this.value; + } + } +} diff --git a/src/main/java/edu/kit/kastel/vads/compiler/lexer/Separator.java b/src/main/java/edu/kit/kastel/vads/compiler/lexer/Separator.java new file mode 100644 index 0000000..61c49ed --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/lexer/Separator.java @@ -0,0 +1,35 @@ +package edu.kit.kastel.vads.compiler.lexer; + +import edu.kit.kastel.vads.compiler.Span; + +public record Separator(SeparatorType type, Span span) implements Token { + + @Override + public boolean isSeparator(SeparatorType separatorType) { + return type() == separatorType; + } + + @Override + public String asString() { + return type().toString(); + } + + public enum SeparatorType { + PAREN_OPEN("("), + PAREN_CLOSE(")"), + BRACE_OPEN("{"), + BRACE_CLOSE("}"), + SEMICOLON(";"); + + private final String value; + + SeparatorType(String value) { + this.value = value; + } + + @Override + public String toString() { + return this.value; + } + } +} diff --git a/src/main/java/edu/kit/kastel/vads/compiler/lexer/Token.java b/src/main/java/edu/kit/kastel/vads/compiler/lexer/Token.java new file mode 100644 index 0000000..1a8546a --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/lexer/Token.java @@ -0,0 +1,22 @@ +package edu.kit.kastel.vads.compiler.lexer; + +import edu.kit.kastel.vads.compiler.Span; + +public sealed interface Token permits ErrorToken, Identifier, Keyword, NumberLiteral, Operator, Separator { + + Span span(); + + default boolean isKeyword(KeywordType keywordType) { + return false; + } + + default boolean isOperator(Operator.OperatorType operatorType) { + return false; + } + + default boolean isSeparator(Separator.SeparatorType separatorType) { + return false; + } + + String asString(); +} diff --git a/src/main/java/edu/kit/kastel/vads/compiler/parser/ParseException.java b/src/main/java/edu/kit/kastel/vads/compiler/parser/ParseException.java new file mode 100644 index 0000000..7d03dfd --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/parser/ParseException.java @@ -0,0 +1,7 @@ +package edu.kit.kastel.vads.compiler.parser; + +public class ParseException extends RuntimeException { + public ParseException(String message) { + super(message); + } +} diff --git a/src/main/java/edu/kit/kastel/vads/compiler/parser/Parser.java b/src/main/java/edu/kit/kastel/vads/compiler/parser/Parser.java new file mode 100644 index 0000000..4b09d47 --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/parser/Parser.java @@ -0,0 +1,201 @@ +package edu.kit.kastel.vads.compiler.parser; + +import edu.kit.kastel.vads.compiler.lexer.Identifier; +import edu.kit.kastel.vads.compiler.lexer.Keyword; +import edu.kit.kastel.vads.compiler.lexer.KeywordType; +import edu.kit.kastel.vads.compiler.lexer.NumberLiteral; +import edu.kit.kastel.vads.compiler.lexer.Operator; +import edu.kit.kastel.vads.compiler.lexer.Operator.OperatorType; +import edu.kit.kastel.vads.compiler.lexer.Separator; +import edu.kit.kastel.vads.compiler.lexer.Separator.SeparatorType; +import edu.kit.kastel.vads.compiler.Span; +import edu.kit.kastel.vads.compiler.lexer.Token; +import edu.kit.kastel.vads.compiler.parser.ast.AssignmentTree; +import edu.kit.kastel.vads.compiler.parser.ast.BinaryOperationTree; +import edu.kit.kastel.vads.compiler.parser.ast.BlockTree; +import edu.kit.kastel.vads.compiler.parser.ast.DeclarationTree; +import edu.kit.kastel.vads.compiler.parser.ast.ExpressionTree; +import edu.kit.kastel.vads.compiler.parser.ast.FunctionTree; +import edu.kit.kastel.vads.compiler.parser.ast.IdentExpressionTree; +import edu.kit.kastel.vads.compiler.parser.ast.LValueIdentTree; +import edu.kit.kastel.vads.compiler.parser.ast.LValueTree; +import edu.kit.kastel.vads.compiler.parser.ast.LiteralTree; +import edu.kit.kastel.vads.compiler.parser.ast.NameTree; +import edu.kit.kastel.vads.compiler.parser.ast.NegateTree; +import edu.kit.kastel.vads.compiler.parser.ast.ProgramTree; +import edu.kit.kastel.vads.compiler.parser.ast.ReturnTree; +import edu.kit.kastel.vads.compiler.parser.ast.StatementTree; +import edu.kit.kastel.vads.compiler.parser.ast.TypeTree; +import edu.kit.kastel.vads.compiler.parser.symbol.Name; +import edu.kit.kastel.vads.compiler.parser.type.BasicType; + +import java.util.ArrayList; +import java.util.List; + +public class Parser { + private final TokenSource tokenSource; + + public Parser(TokenSource tokenSource) { + this.tokenSource = tokenSource; + } + + public ProgramTree parseProgram() { + return new ProgramTree(List.of(parseFunction())); + } + + private FunctionTree parseFunction() { + Keyword returnType = this.tokenSource.expectKeyword(KeywordType.INT); + Identifier identifier = this.tokenSource.expectIdentifier(); + this.tokenSource.expectSeparator(SeparatorType.PAREN_OPEN); + this.tokenSource.expectSeparator(SeparatorType.PAREN_CLOSE); + BlockTree body = parseBlock(); + return new FunctionTree( + new TypeTree(BasicType.INT, returnType.span()), + name(identifier), + body + ); + } + + private BlockTree parseBlock() { + Separator bodyOpen = this.tokenSource.expectSeparator(SeparatorType.BRACE_OPEN); + List statements = new ArrayList<>(); + while (!(this.tokenSource.peek() instanceof Separator sep && sep.type() == SeparatorType.BRACE_CLOSE)) { + statements.add(parseStatement()); + } + Separator bodyClose = this.tokenSource.expectSeparator(SeparatorType.BRACE_CLOSE); + return new BlockTree(statements, bodyOpen.span().merge(bodyClose.span())); + } + + private StatementTree parseStatement() { + StatementTree statement; + if (this.tokenSource.peek().isKeyword(KeywordType.INT)) { + statement = parseDeclaration(); + } else if (this.tokenSource.peek().isKeyword(KeywordType.RETURN)) { + statement = parseReturn(); + } else { + statement = parseSimple(); + } + this.tokenSource.expectSeparator(SeparatorType.SEMICOLON); + return statement; + } + + private StatementTree parseDeclaration() { + Keyword type = this.tokenSource.expectKeyword(KeywordType.INT); + Identifier ident = this.tokenSource.expectIdentifier(); + ExpressionTree expr = null; + if (this.tokenSource.peek().isOperator(OperatorType.ASSIGN)) { + this.tokenSource.expectOperator(OperatorType.ASSIGN); + expr = parseExpression(); + } + return new DeclarationTree(new TypeTree(BasicType.INT, type.span()), name(ident), expr); + } + + private StatementTree parseSimple() { + LValueTree lValue = parseLValue(); + Operator assignmentOperator = parseAssignmentOperator(); + ExpressionTree expression = parseExpression(); + return new AssignmentTree(lValue, assignmentOperator, expression); + } + + private Operator parseAssignmentOperator() { + if (this.tokenSource.peek() instanceof Operator op) { + return switch (op.type()) { + case ASSIGN, ASSIGN_DIV, ASSIGN_MINUS, ASSIGN_MOD, ASSIGN_MUL, ASSIGN_PLUS -> { + this.tokenSource.consume(); + yield op; + } + default -> throw new ParseException("expected assignment but got " + op.type()); + }; + } + throw new ParseException("expected assignment but got " + this.tokenSource.peek()); + } + + private LValueTree parseLValue() { + if (this.tokenSource.peek().isSeparator(SeparatorType.PAREN_OPEN)) { + this.tokenSource.expectSeparator(SeparatorType.PAREN_OPEN); + LValueTree inner = parseLValue(); + this.tokenSource.expectSeparator(SeparatorType.PAREN_CLOSE); + return inner; + } + Identifier identifier = this.tokenSource.expectIdentifier(); + return new LValueIdentTree(name(identifier)); + } + + private StatementTree parseReturn() { + Keyword ret = this.tokenSource.expectKeyword(KeywordType.RETURN); + ExpressionTree expression = parseExpression(); + return new ReturnTree(expression, ret.span().start()); + } + + private ExpressionTree parseExpression() { + ExpressionTree lhs = parseTerm(); + while (true) { + if (this.tokenSource.peek() instanceof Operator(var type, _) + && (type == OperatorType.PLUS || type == OperatorType.MINUS)) { + this.tokenSource.consume(); + lhs = new BinaryOperationTree(lhs, parseTerm(), type); + } else { + return lhs; + } + } + } + + private ExpressionTree parseTerm() { + ExpressionTree lhs = parseFactor(); + while (true) { + if (this.tokenSource.peek() instanceof Operator(var type, _) + && (type == OperatorType.MUL || type == OperatorType.DIV || type == OperatorType.MOD)) { + this.tokenSource.consume(); + lhs = new BinaryOperationTree(lhs, parseFactor(), type); + } else { + return lhs; + } + } + } + + private ExpressionTree parseFactor() { + return switch (this.tokenSource.peek()) { + case Separator(var type, _) when type == SeparatorType.PAREN_OPEN -> { + this.tokenSource.consume(); + ExpressionTree expression = parseExpression(); + this.tokenSource.expectSeparator(SeparatorType.PAREN_CLOSE); + yield expression; + } + case Operator(var type, _) when type == OperatorType.MINUS -> { + Span span = this.tokenSource.consume().span(); + yield new NegateTree(parseFactor(), span); + } + case Identifier ident -> { + this.tokenSource.consume(); + yield new IdentExpressionTree(name(ident)); + } + case NumberLiteral(String value, int base, Span span) -> { + this.tokenSource.consume(); + yield new LiteralTree(parseValue(value, base), span); + } + case Token t -> throw new ParseException("invalid factor " + t); + }; + } + + private static long parseValue(String value, int base) { + int begin = 0; + int end = value.length(); + if (base == 16) { + begin = 2; // ignore 0x + } + long l; + try { + l = Long.parseLong(value, begin, end, base); + } catch (NumberFormatException _) { + throw new ParseException("invalid int literal " + value); + } + if (l < 0 || l > Integer.toUnsignedLong(Integer.MIN_VALUE)) { + throw new ParseException("invalid int literal " + value); + } + return l; + } + + private static NameTree name(Identifier ident) { + return new NameTree(Name.forIdentifier(ident), ident.span()); + } +} diff --git a/src/main/java/edu/kit/kastel/vads/compiler/parser/Printer.java b/src/main/java/edu/kit/kastel/vads/compiler/parser/Printer.java new file mode 100644 index 0000000..f4d4d82 --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/parser/Printer.java @@ -0,0 +1,138 @@ +package edu.kit.kastel.vads.compiler.parser; + +import edu.kit.kastel.vads.compiler.parser.ast.AssignmentTree; +import edu.kit.kastel.vads.compiler.parser.ast.BinaryOperationTree; +import edu.kit.kastel.vads.compiler.parser.ast.BlockTree; +import edu.kit.kastel.vads.compiler.parser.ast.IdentExpressionTree; +import edu.kit.kastel.vads.compiler.parser.ast.LValueIdentTree; +import edu.kit.kastel.vads.compiler.parser.ast.LiteralTree; +import edu.kit.kastel.vads.compiler.parser.ast.NameTree; +import edu.kit.kastel.vads.compiler.parser.ast.NegateTree; +import edu.kit.kastel.vads.compiler.parser.ast.ReturnTree; +import edu.kit.kastel.vads.compiler.parser.ast.Tree; +import edu.kit.kastel.vads.compiler.parser.ast.DeclarationTree; +import edu.kit.kastel.vads.compiler.parser.ast.FunctionTree; +import edu.kit.kastel.vads.compiler.parser.ast.ProgramTree; +import edu.kit.kastel.vads.compiler.parser.ast.StatementTree; +import edu.kit.kastel.vads.compiler.parser.ast.TypeTree; + +import java.util.List; + +/// This is a utility class to help with debugging the parser. +public class Printer { + + private final Tree ast; + private final StringBuilder builder = new StringBuilder(); + private boolean requiresIndent; + private int indentDepth; + + public Printer(Tree ast) { + this.ast = ast; + } + + public static String print(Tree ast) { + Printer printer = new Printer(ast); + printer.printRoot(); + return printer.builder.toString(); + } + + private void printRoot() { + printTree(this.ast); + } + + private void printTree(Tree tree) { + switch (tree) { + case BlockTree(List statements, _) -> { + print("{"); + lineBreak(); + this.indentDepth++; + for (StatementTree statement : statements) { + printTree(statement); + } + this.indentDepth--; + print("}"); + } + case FunctionTree(var returnType, var name, var body) -> { + printTree(returnType); + space(); + printTree(name); + print("()"); + space(); + printTree(body); + } + case NameTree(var name, _) -> print(name.asString()); + case ProgramTree(var topLevelTrees) -> { + for (FunctionTree function : topLevelTrees) { + printTree(function); + lineBreak(); + } + } + case TypeTree(var type, _) -> print(type.asString()); + case BinaryOperationTree(var lhs, var rhs, var op) -> { + print("("); + printTree(lhs); + print(")"); + space(); + this.builder.append(op); + space(); + print("("); + printTree(rhs); + print(")"); + } + case LiteralTree(var value, _) -> this.builder.append(value); + case NegateTree(var expression, _) -> { + print("-("); + printTree(expression); + print(")"); + } + case AssignmentTree(var lValue, var op, var expression) -> { + printTree(lValue); + space(); + this.builder.append(op); + space(); + printTree(expression); + semicolon(); + } + case DeclarationTree(var type, var name, var initializer) -> { + printTree(type); + space(); + printTree(name); + if (initializer != null) { + print(" = "); + printTree(initializer); + } + semicolon(); + } + case ReturnTree(var expr, _) -> { + print("return "); + printTree(expr); + semicolon(); + } + case LValueIdentTree(var name) -> printTree(name); + case IdentExpressionTree(var name) -> printTree(name); + } + } + + private void print(String str) { + if (this.requiresIndent) { + this.requiresIndent = false; + this.builder.append(" ".repeat(4 * this.indentDepth)); + } + this.builder.append(str); + } + + private void lineBreak() { + this.builder.append("\n"); + this.requiresIndent = true; + } + + private void semicolon() { + this.builder.append(";"); + lineBreak(); + } + + private void space() { + this.builder.append(" "); + } + +} diff --git a/src/main/java/edu/kit/kastel/vads/compiler/parser/TokenSource.java b/src/main/java/edu/kit/kastel/vads/compiler/parser/TokenSource.java new file mode 100644 index 0000000..7790c0a --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/parser/TokenSource.java @@ -0,0 +1,83 @@ +package edu.kit.kastel.vads.compiler.parser; + +import edu.kit.kastel.vads.compiler.lexer.Identifier; +import edu.kit.kastel.vads.compiler.lexer.Keyword; +import edu.kit.kastel.vads.compiler.lexer.KeywordType; +import edu.kit.kastel.vads.compiler.lexer.Lexer; +import edu.kit.kastel.vads.compiler.lexer.Operator; +import edu.kit.kastel.vads.compiler.lexer.Operator.OperatorType; +import edu.kit.kastel.vads.compiler.lexer.Separator; +import edu.kit.kastel.vads.compiler.lexer.Separator.SeparatorType; +import edu.kit.kastel.vads.compiler.lexer.Token; + +import java.util.List; +import java.util.Optional; +import java.util.stream.Stream; + +public class TokenSource { + private final List tokens; + private int idx; + + public TokenSource(Lexer lexer) { + this.tokens = Stream.generate(lexer::nextToken) + .takeWhile(Optional::isPresent) + .map(Optional::orElseThrow) + .toList(); + } + + TokenSource(List tokens) { + this.tokens = List.copyOf(tokens); + } + + public Token peek() { + expectHasMore(); + return this.tokens.get(this.idx); + } + + public Keyword expectKeyword(KeywordType type) { + Token token = peek(); + if (!(token instanceof Keyword kw) || kw.type() != type) { + throw new ParseException("expected keyword '" + type + "' but got " + token); + } + this.idx++; + return kw; + } + + public Separator expectSeparator(SeparatorType type) { + Token token = peek(); + if (!(token instanceof Separator sep) || sep.type() != type) { + throw new ParseException("expected separator '" + type + "' but got " + token); + } + this.idx++; + return sep; + } + + public Operator expectOperator(OperatorType type) { + Token token = peek(); + if (!(token instanceof Operator op) || op.type() != type) { + throw new ParseException("expected operator '" + type + "' but got " + token); + } + this.idx++; + return op; + } + public Identifier expectIdentifier() { + Token token = peek(); + if (!(token instanceof Identifier ident)) { + throw new ParseException("expected identifier but got " + token); + } + this.idx++; + return ident; + } + + public Token consume() { + Token token = peek(); + this.idx++; + return token; + } + + private void expectHasMore() { + if (this.idx >= this.tokens.size()) { + throw new ParseException("reached end of file"); + } + } +} diff --git a/src/main/java/edu/kit/kastel/vads/compiler/parser/ast/AssignmentTree.java b/src/main/java/edu/kit/kastel/vads/compiler/parser/ast/AssignmentTree.java new file mode 100644 index 0000000..ead94e4 --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/parser/ast/AssignmentTree.java @@ -0,0 +1,17 @@ +package edu.kit.kastel.vads.compiler.parser.ast; + +import edu.kit.kastel.vads.compiler.lexer.Operator; +import edu.kit.kastel.vads.compiler.Span; +import edu.kit.kastel.vads.compiler.parser.visitor.Visitor; + +public record AssignmentTree(LValueTree lValue, Operator operator, ExpressionTree expression) implements StatementTree { + @Override + public Span span() { + return lValue().span().merge(expression().span()); + } + + @Override + public R accept(Visitor visitor, T data) { + return visitor.visit(this, data); + } +} diff --git a/src/main/java/edu/kit/kastel/vads/compiler/parser/ast/BinaryOperationTree.java b/src/main/java/edu/kit/kastel/vads/compiler/parser/ast/BinaryOperationTree.java new file mode 100644 index 0000000..df7483a --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/parser/ast/BinaryOperationTree.java @@ -0,0 +1,19 @@ +package edu.kit.kastel.vads.compiler.parser.ast; + +import edu.kit.kastel.vads.compiler.lexer.Operator; +import edu.kit.kastel.vads.compiler.Span; +import edu.kit.kastel.vads.compiler.parser.visitor.Visitor; + +public record BinaryOperationTree( + ExpressionTree lhs, ExpressionTree rhs, Operator.OperatorType operatorType +) implements ExpressionTree { + @Override + public Span span() { + return lhs().span().merge(rhs().span()); + } + + @Override + public R accept(Visitor visitor, T data) { + return visitor.visit(this, data); + } +} diff --git a/src/main/java/edu/kit/kastel/vads/compiler/parser/ast/BlockTree.java b/src/main/java/edu/kit/kastel/vads/compiler/parser/ast/BlockTree.java new file mode 100644 index 0000000..01110fd --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/parser/ast/BlockTree.java @@ -0,0 +1,18 @@ +package edu.kit.kastel.vads.compiler.parser.ast; + +import edu.kit.kastel.vads.compiler.Span; +import edu.kit.kastel.vads.compiler.parser.visitor.Visitor; + +import java.util.List; + +public record BlockTree(List statements, Span span) implements StatementTree { + + public BlockTree { + statements = List.copyOf(statements); + } + + @Override + public R accept(Visitor visitor, T data) { + return visitor.visit(this, data); + } +} diff --git a/src/main/java/edu/kit/kastel/vads/compiler/parser/ast/DeclarationTree.java b/src/main/java/edu/kit/kastel/vads/compiler/parser/ast/DeclarationTree.java new file mode 100644 index 0000000..f857b32 --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/parser/ast/DeclarationTree.java @@ -0,0 +1,20 @@ +package edu.kit.kastel.vads.compiler.parser.ast; + +import edu.kit.kastel.vads.compiler.Span; +import edu.kit.kastel.vads.compiler.parser.visitor.Visitor; +import org.jspecify.annotations.Nullable; + +public record DeclarationTree(TypeTree type, NameTree name, @Nullable ExpressionTree initializer) implements StatementTree { + @Override + public Span span() { + if (initializer() != null) { + return type().span().merge(initializer().span()); + } + return type().span().merge(name().span()); + } + + @Override + public R accept(Visitor visitor, T data) { + return visitor.visit(this, data); + } +} diff --git a/src/main/java/edu/kit/kastel/vads/compiler/parser/ast/ExpressionTree.java b/src/main/java/edu/kit/kastel/vads/compiler/parser/ast/ExpressionTree.java new file mode 100644 index 0000000..3e30afa --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/parser/ast/ExpressionTree.java @@ -0,0 +1,4 @@ +package edu.kit.kastel.vads.compiler.parser.ast; + +public sealed interface ExpressionTree extends Tree permits BinaryOperationTree, IdentExpressionTree, LiteralTree, NegateTree { +} diff --git a/src/main/java/edu/kit/kastel/vads/compiler/parser/ast/FunctionTree.java b/src/main/java/edu/kit/kastel/vads/compiler/parser/ast/FunctionTree.java new file mode 100644 index 0000000..ff66ae5 --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/parser/ast/FunctionTree.java @@ -0,0 +1,16 @@ +package edu.kit.kastel.vads.compiler.parser.ast; + +import edu.kit.kastel.vads.compiler.Span; +import edu.kit.kastel.vads.compiler.parser.visitor.Visitor; + +public record FunctionTree(TypeTree returnType, NameTree name, BlockTree body) implements Tree { + @Override + public Span span() { + return new Span.SimpleSpan(returnType().span().start(), body().span().end()); + } + + @Override + public R accept(Visitor visitor, T data) { + return visitor.visit(this, data); + } +} diff --git a/src/main/java/edu/kit/kastel/vads/compiler/parser/ast/IdentExpressionTree.java b/src/main/java/edu/kit/kastel/vads/compiler/parser/ast/IdentExpressionTree.java new file mode 100644 index 0000000..d2f0939 --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/parser/ast/IdentExpressionTree.java @@ -0,0 +1,16 @@ +package edu.kit.kastel.vads.compiler.parser.ast; + +import edu.kit.kastel.vads.compiler.Span; +import edu.kit.kastel.vads.compiler.parser.visitor.Visitor; + +public record IdentExpressionTree(NameTree name) implements ExpressionTree { + @Override + public Span span() { + return name().span(); + } + + @Override + public R accept(Visitor visitor, T data) { + return visitor.visit(this, data); + } +} diff --git a/src/main/java/edu/kit/kastel/vads/compiler/parser/ast/LValueIdentTree.java b/src/main/java/edu/kit/kastel/vads/compiler/parser/ast/LValueIdentTree.java new file mode 100644 index 0000000..332d8af --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/parser/ast/LValueIdentTree.java @@ -0,0 +1,16 @@ +package edu.kit.kastel.vads.compiler.parser.ast; + +import edu.kit.kastel.vads.compiler.Span; +import edu.kit.kastel.vads.compiler.parser.visitor.Visitor; + +public record LValueIdentTree(NameTree name) implements LValueTree { + @Override + public Span span() { + return name().span(); + } + + @Override + public R accept(Visitor visitor, T data) { + return visitor.visit(this, data); + } +} diff --git a/src/main/java/edu/kit/kastel/vads/compiler/parser/ast/LValueTree.java b/src/main/java/edu/kit/kastel/vads/compiler/parser/ast/LValueTree.java new file mode 100644 index 0000000..13f6dd2 --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/parser/ast/LValueTree.java @@ -0,0 +1,4 @@ +package edu.kit.kastel.vads.compiler.parser.ast; + +public sealed interface LValueTree extends Tree permits LValueIdentTree { +} diff --git a/src/main/java/edu/kit/kastel/vads/compiler/parser/ast/LiteralTree.java b/src/main/java/edu/kit/kastel/vads/compiler/parser/ast/LiteralTree.java new file mode 100644 index 0000000..68dca65 --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/parser/ast/LiteralTree.java @@ -0,0 +1,11 @@ +package edu.kit.kastel.vads.compiler.parser.ast; + +import edu.kit.kastel.vads.compiler.Span; +import edu.kit.kastel.vads.compiler.parser.visitor.Visitor; + +public record LiteralTree(long value, Span span) implements ExpressionTree { + @Override + public R accept(Visitor visitor, T data) { + return visitor.visit(this, data); + } +} diff --git a/src/main/java/edu/kit/kastel/vads/compiler/parser/ast/NameTree.java b/src/main/java/edu/kit/kastel/vads/compiler/parser/ast/NameTree.java new file mode 100644 index 0000000..bf45c10 --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/parser/ast/NameTree.java @@ -0,0 +1,12 @@ +package edu.kit.kastel.vads.compiler.parser.ast; + +import edu.kit.kastel.vads.compiler.Span; +import edu.kit.kastel.vads.compiler.parser.symbol.Name; +import edu.kit.kastel.vads.compiler.parser.visitor.Visitor; + +public record NameTree(Name name, Span span) implements Tree { + @Override + public R accept(Visitor visitor, T data) { + return visitor.visit(this, data); + } +} diff --git a/src/main/java/edu/kit/kastel/vads/compiler/parser/ast/NegateTree.java b/src/main/java/edu/kit/kastel/vads/compiler/parser/ast/NegateTree.java new file mode 100644 index 0000000..d7d602a --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/parser/ast/NegateTree.java @@ -0,0 +1,16 @@ +package edu.kit.kastel.vads.compiler.parser.ast; + +import edu.kit.kastel.vads.compiler.Span; +import edu.kit.kastel.vads.compiler.parser.visitor.Visitor; + +public record NegateTree(ExpressionTree expression, Span minusPos) implements ExpressionTree { + @Override + public Span span() { + return minusPos().merge(expression().span()); + } + + @Override + public R accept(Visitor visitor, T data) { + return visitor.visit(this, data); + } +} diff --git a/src/main/java/edu/kit/kastel/vads/compiler/parser/ast/ProgramTree.java b/src/main/java/edu/kit/kastel/vads/compiler/parser/ast/ProgramTree.java new file mode 100644 index 0000000..eb1f9bc --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/parser/ast/ProgramTree.java @@ -0,0 +1,24 @@ +package edu.kit.kastel.vads.compiler.parser.ast; + +import edu.kit.kastel.vads.compiler.Span; +import edu.kit.kastel.vads.compiler.parser.visitor.Visitor; + +import java.util.List; + +public record ProgramTree(List topLevelTrees) implements Tree { + public ProgramTree { + assert !topLevelTrees.isEmpty() : "must be non-empty"; + topLevelTrees = List.copyOf(topLevelTrees); + } + @Override + public Span span() { + var first = topLevelTrees.getFirst(); + var last = topLevelTrees.getLast(); + return new Span.SimpleSpan(first.span().start(), last.span().end()); + } + + @Override + public R accept(Visitor visitor, T data) { + return visitor.visit(this, data); + } +} diff --git a/src/main/java/edu/kit/kastel/vads/compiler/parser/ast/ReturnTree.java b/src/main/java/edu/kit/kastel/vads/compiler/parser/ast/ReturnTree.java new file mode 100644 index 0000000..6d506a0 --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/parser/ast/ReturnTree.java @@ -0,0 +1,17 @@ +package edu.kit.kastel.vads.compiler.parser.ast; + +import edu.kit.kastel.vads.compiler.Position; +import edu.kit.kastel.vads.compiler.Span; +import edu.kit.kastel.vads.compiler.parser.visitor.Visitor; + +public record ReturnTree(ExpressionTree expression, Position start) implements StatementTree { + @Override + public Span span() { + return new Span.SimpleSpan(start(), expression().span().end()); + } + + @Override + public R accept(Visitor visitor, T data) { + return visitor.visit(this, data); + } +} diff --git a/src/main/java/edu/kit/kastel/vads/compiler/parser/ast/StatementTree.java b/src/main/java/edu/kit/kastel/vads/compiler/parser/ast/StatementTree.java new file mode 100644 index 0000000..604d985 --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/parser/ast/StatementTree.java @@ -0,0 +1,4 @@ +package edu.kit.kastel.vads.compiler.parser.ast; + +public sealed interface StatementTree extends Tree permits AssignmentTree, BlockTree, DeclarationTree, ReturnTree { +} diff --git a/src/main/java/edu/kit/kastel/vads/compiler/parser/ast/Tree.java b/src/main/java/edu/kit/kastel/vads/compiler/parser/ast/Tree.java new file mode 100644 index 0000000..05fde8a --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/parser/ast/Tree.java @@ -0,0 +1,11 @@ +package edu.kit.kastel.vads.compiler.parser.ast; + +import edu.kit.kastel.vads.compiler.Span; +import edu.kit.kastel.vads.compiler.parser.visitor.Visitor; + +public sealed interface Tree permits ExpressionTree, FunctionTree, LValueTree, NameTree, ProgramTree, StatementTree, TypeTree { + + Span span(); + + R accept(Visitor visitor, T data); +} diff --git a/src/main/java/edu/kit/kastel/vads/compiler/parser/ast/TypeTree.java b/src/main/java/edu/kit/kastel/vads/compiler/parser/ast/TypeTree.java new file mode 100644 index 0000000..49f8017 --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/parser/ast/TypeTree.java @@ -0,0 +1,12 @@ +package edu.kit.kastel.vads.compiler.parser.ast; + +import edu.kit.kastel.vads.compiler.Span; +import edu.kit.kastel.vads.compiler.parser.type.Type; +import edu.kit.kastel.vads.compiler.parser.visitor.Visitor; + +public record TypeTree(Type type, Span span) implements Tree { + @Override + public R accept(Visitor visitor, T data) { + return visitor.visit(this, data); + } +} diff --git a/src/main/java/edu/kit/kastel/vads/compiler/parser/symbol/IdentName.java b/src/main/java/edu/kit/kastel/vads/compiler/parser/symbol/IdentName.java new file mode 100644 index 0000000..07cc94e --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/parser/symbol/IdentName.java @@ -0,0 +1,8 @@ +package edu.kit.kastel.vads.compiler.parser.symbol; + +record IdentName(String identifier) implements Name { + @Override + public String asString() { + return identifier(); + } +} diff --git a/src/main/java/edu/kit/kastel/vads/compiler/parser/symbol/KeywordName.java b/src/main/java/edu/kit/kastel/vads/compiler/parser/symbol/KeywordName.java new file mode 100644 index 0000000..4f4d298 --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/parser/symbol/KeywordName.java @@ -0,0 +1,10 @@ +package edu.kit.kastel.vads.compiler.parser.symbol; + +import edu.kit.kastel.vads.compiler.lexer.KeywordType; + +record KeywordName(KeywordType type) implements Name { + @Override + public String asString() { + return type().keyword(); + } +} diff --git a/src/main/java/edu/kit/kastel/vads/compiler/parser/symbol/Name.java b/src/main/java/edu/kit/kastel/vads/compiler/parser/symbol/Name.java new file mode 100644 index 0000000..39d8cae --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/parser/symbol/Name.java @@ -0,0 +1,17 @@ +package edu.kit.kastel.vads.compiler.parser.symbol; + +import edu.kit.kastel.vads.compiler.lexer.Identifier; +import edu.kit.kastel.vads.compiler.lexer.Keyword; + +public sealed interface Name permits IdentName, KeywordName { + + static Name forKeyword(Keyword keyword) { + return new KeywordName(keyword.type()); + } + + static Name forIdentifier(Identifier identifier) { + return new IdentName(identifier.value()); + } + + String asString(); +} diff --git a/src/main/java/edu/kit/kastel/vads/compiler/parser/type/BasicType.java b/src/main/java/edu/kit/kastel/vads/compiler/parser/type/BasicType.java new file mode 100644 index 0000000..5e93e4a --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/parser/type/BasicType.java @@ -0,0 +1,12 @@ +package edu.kit.kastel.vads.compiler.parser.type; + +import java.util.Locale; + +public enum BasicType implements Type { + INT; + + @Override + public String asString() { + return name().toLowerCase(Locale.ROOT); + } +} diff --git a/src/main/java/edu/kit/kastel/vads/compiler/parser/type/Type.java b/src/main/java/edu/kit/kastel/vads/compiler/parser/type/Type.java new file mode 100644 index 0000000..72d914c --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/parser/type/Type.java @@ -0,0 +1,5 @@ +package edu.kit.kastel.vads.compiler.parser.type; + +public sealed interface Type permits BasicType { + String asString(); +} diff --git a/src/main/java/edu/kit/kastel/vads/compiler/parser/visitor/NoOpVisitor.java b/src/main/java/edu/kit/kastel/vads/compiler/parser/visitor/NoOpVisitor.java new file mode 100644 index 0000000..1bd9521 --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/parser/visitor/NoOpVisitor.java @@ -0,0 +1,85 @@ +package edu.kit.kastel.vads.compiler.parser.visitor; + +import edu.kit.kastel.vads.compiler.parser.ast.AssignmentTree; +import edu.kit.kastel.vads.compiler.parser.ast.BinaryOperationTree; +import edu.kit.kastel.vads.compiler.parser.ast.BlockTree; +import edu.kit.kastel.vads.compiler.parser.ast.DeclarationTree; +import edu.kit.kastel.vads.compiler.parser.ast.FunctionTree; +import edu.kit.kastel.vads.compiler.parser.ast.IdentExpressionTree; +import edu.kit.kastel.vads.compiler.parser.ast.LValueIdentTree; +import edu.kit.kastel.vads.compiler.parser.ast.LiteralTree; +import edu.kit.kastel.vads.compiler.parser.ast.NameTree; +import edu.kit.kastel.vads.compiler.parser.ast.NegateTree; +import edu.kit.kastel.vads.compiler.parser.ast.ProgramTree; +import edu.kit.kastel.vads.compiler.parser.ast.ReturnTree; +import edu.kit.kastel.vads.compiler.parser.ast.TypeTree; + +/// A visitor that does nothing and returns [Unit#INSTANCE] by default. +/// This can be used to implement operations only for specific tree types. +public interface NoOpVisitor extends Visitor { + + @Override + default Unit visit(AssignmentTree assignmentTree, T data) { + return Unit.INSTANCE; + } + + @Override + default Unit visit(BinaryOperationTree binaryOperationTree, T data) { + return Unit.INSTANCE; + } + + @Override + default Unit visit(BlockTree blockTree, T data) { + return Unit.INSTANCE; + } + + @Override + default Unit visit(DeclarationTree declarationTree, T data) { + return Unit.INSTANCE; + } + + @Override + default Unit visit(FunctionTree functionTree, T data) { + return Unit.INSTANCE; + } + + @Override + default Unit visit(IdentExpressionTree identExpressionTree, T data) { + return Unit.INSTANCE; + } + + @Override + default Unit visit(LiteralTree literalTree, T data) { + return Unit.INSTANCE; + } + + @Override + default Unit visit(LValueIdentTree lValueIdentTree, T data) { + return Unit.INSTANCE; + } + + @Override + default Unit visit(NameTree nameTree, T data) { + return Unit.INSTANCE; + } + + @Override + default Unit visit(NegateTree negateTree, T data) { + return Unit.INSTANCE; + } + + @Override + default Unit visit(ProgramTree programTree, T data) { + return Unit.INSTANCE; + } + + @Override + default Unit visit(ReturnTree returnTree, T data) { + return Unit.INSTANCE; + } + + @Override + default Unit visit(TypeTree typeTree, T data) { + return Unit.INSTANCE; + } +} diff --git a/src/main/java/edu/kit/kastel/vads/compiler/parser/visitor/RecursivePostorderVisitor.java b/src/main/java/edu/kit/kastel/vads/compiler/parser/visitor/RecursivePostorderVisitor.java new file mode 100644 index 0000000..5d94125 --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/parser/visitor/RecursivePostorderVisitor.java @@ -0,0 +1,134 @@ +package edu.kit.kastel.vads.compiler.parser.visitor; + +import edu.kit.kastel.vads.compiler.parser.ast.AssignmentTree; +import edu.kit.kastel.vads.compiler.parser.ast.BinaryOperationTree; +import edu.kit.kastel.vads.compiler.parser.ast.BlockTree; +import edu.kit.kastel.vads.compiler.parser.ast.DeclarationTree; +import edu.kit.kastel.vads.compiler.parser.ast.FunctionTree; +import edu.kit.kastel.vads.compiler.parser.ast.IdentExpressionTree; +import edu.kit.kastel.vads.compiler.parser.ast.LValueIdentTree; +import edu.kit.kastel.vads.compiler.parser.ast.LiteralTree; +import edu.kit.kastel.vads.compiler.parser.ast.NameTree; +import edu.kit.kastel.vads.compiler.parser.ast.NegateTree; +import edu.kit.kastel.vads.compiler.parser.ast.ProgramTree; +import edu.kit.kastel.vads.compiler.parser.ast.ReturnTree; +import edu.kit.kastel.vads.compiler.parser.ast.StatementTree; +import edu.kit.kastel.vads.compiler.parser.ast.TypeTree; + +/// A visitor that traverses a tree in postorder +/// @param a type for additional data +/// @param a type for a return type +public class RecursivePostorderVisitor implements Visitor { + private final Visitor visitor; + + public RecursivePostorderVisitor(Visitor visitor) { + this.visitor = visitor; + } + + @Override + public R visit(AssignmentTree assignmentTree, T data) { + R r = assignmentTree.lValue().accept(this, data); + r = assignmentTree.expression().accept(this, accumulate(data, r)); + r = this.visitor.visit(assignmentTree, accumulate(data, r)); + return r; + } + + @Override + public R visit(BinaryOperationTree binaryOperationTree, T data) { + R r = binaryOperationTree.lhs().accept(this, data); + r = binaryOperationTree.rhs().accept(this, accumulate(data, r)); + r = this.visitor.visit(binaryOperationTree, accumulate(data, r)); + return r; + } + + @Override + public R visit(BlockTree blockTree, T data) { + R r; + T d = data; + for (StatementTree statement : blockTree.statements()) { + r = statement.accept(this, d); + d = accumulate(d, r); + } + r = this.visitor.visit(blockTree, d); + return r; + } + + @Override + public R visit(DeclarationTree declarationTree, T data) { + R r = declarationTree.type().accept(this, data); + r = declarationTree.name().accept(this, accumulate(data, r)); + if (declarationTree.initializer() != null) { + r = declarationTree.initializer().accept(this, accumulate(data, r)); + } + r = this.visitor.visit(declarationTree, accumulate(data, r)); + return r; + } + + @Override + public R visit(FunctionTree functionTree, T data) { + R r = functionTree.returnType().accept(this, data); + r = functionTree.name().accept(this, accumulate(data, r)); + r = functionTree.body().accept(this, accumulate(data, r)); + r = this.visitor.visit(functionTree, accumulate(data, r)); + return r; + } + + @Override + public R visit(IdentExpressionTree identExpressionTree, T data) { + R r = identExpressionTree.name().accept(this, data); + r = this.visitor.visit(identExpressionTree, accumulate(data, r)); + return r; + } + + @Override + public R visit(LiteralTree literalTree, T data) { + return this.visitor.visit(literalTree, data); + } + + @Override + public R visit(LValueIdentTree lValueIdentTree, T data) { + R r = lValueIdentTree.name().accept(this, data); + r = this.visitor.visit(lValueIdentTree, accumulate(data, r)); + return r; + } + + @Override + public R visit(NameTree nameTree, T data) { + return this.visitor.visit(nameTree, data); + } + + @Override + public R visit(NegateTree negateTree, T data) { + R r = negateTree.expression().accept(this, data); + r = this.visitor.visit(negateTree, accumulate(data, r)); + return r; + } + + @Override + public R visit(ProgramTree programTree, T data) { + R r; + T d = data; + for (FunctionTree tree : programTree.topLevelTrees()) { + r = tree.accept(this, d); + d = accumulate(data, r); + } + r = this.visitor.visit(programTree, d); + return r; + } + + @Override + public R visit(ReturnTree returnTree, T data) { + R r = returnTree.expression().accept(this, data); + r = this.visitor.visit(returnTree, accumulate(data, r)); + return r; + } + + @Override + public R visit(TypeTree typeTree, T data) { + return this.visitor.visit(typeTree, data); + } + + protected T accumulate(T data, R value) { + return data; + } +} diff --git a/src/main/java/edu/kit/kastel/vads/compiler/parser/visitor/Unit.java b/src/main/java/edu/kit/kastel/vads/compiler/parser/visitor/Unit.java new file mode 100644 index 0000000..7df0c60 --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/parser/visitor/Unit.java @@ -0,0 +1,5 @@ +package edu.kit.kastel.vads.compiler.parser.visitor; + +public enum Unit { + INSTANCE +} diff --git a/src/main/java/edu/kit/kastel/vads/compiler/parser/visitor/Visitor.java b/src/main/java/edu/kit/kastel/vads/compiler/parser/visitor/Visitor.java new file mode 100644 index 0000000..bbc7208 --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/parser/visitor/Visitor.java @@ -0,0 +1,44 @@ +package edu.kit.kastel.vads.compiler.parser.visitor; + +import edu.kit.kastel.vads.compiler.parser.ast.AssignmentTree; +import edu.kit.kastel.vads.compiler.parser.ast.BinaryOperationTree; +import edu.kit.kastel.vads.compiler.parser.ast.BlockTree; +import edu.kit.kastel.vads.compiler.parser.ast.DeclarationTree; +import edu.kit.kastel.vads.compiler.parser.ast.FunctionTree; +import edu.kit.kastel.vads.compiler.parser.ast.IdentExpressionTree; +import edu.kit.kastel.vads.compiler.parser.ast.LValueIdentTree; +import edu.kit.kastel.vads.compiler.parser.ast.LiteralTree; +import edu.kit.kastel.vads.compiler.parser.ast.NameTree; +import edu.kit.kastel.vads.compiler.parser.ast.NegateTree; +import edu.kit.kastel.vads.compiler.parser.ast.ProgramTree; +import edu.kit.kastel.vads.compiler.parser.ast.ReturnTree; +import edu.kit.kastel.vads.compiler.parser.ast.TypeTree; + +public interface Visitor { + + R visit(AssignmentTree assignmentTree, T data); + + R visit(BinaryOperationTree binaryOperationTree, T data); + + R visit(BlockTree blockTree, T data); + + R visit(DeclarationTree declarationTree, T data); + + R visit(FunctionTree functionTree, T data); + + R visit(IdentExpressionTree identExpressionTree, T data); + + R visit(LiteralTree literalTree, T data); + + R visit(LValueIdentTree lValueIdentTree, T data); + + R visit(NameTree nameTree, T data); + + R visit(NegateTree negateTree, T data); + + R visit(ProgramTree programTree, T data); + + R visit(ReturnTree returnTree, T data); + + R visit(TypeTree typeTree, T data); +} diff --git a/src/main/java/edu/kit/kastel/vads/compiler/semantic/Namespace.java b/src/main/java/edu/kit/kastel/vads/compiler/semantic/Namespace.java new file mode 100644 index 0000000..d96baa8 --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/semantic/Namespace.java @@ -0,0 +1,26 @@ +package edu.kit.kastel.vads.compiler.semantic; + +import edu.kit.kastel.vads.compiler.parser.ast.NameTree; +import edu.kit.kastel.vads.compiler.parser.symbol.Name; +import org.jspecify.annotations.Nullable; + +import java.util.HashMap; +import java.util.Map; +import java.util.function.BinaryOperator; + +public class Namespace { + + private final Map content; + + public Namespace() { + this.content = new HashMap<>(); + } + + public void put(NameTree name, T value, BinaryOperator merger) { + this.content.merge(name.name(), value, merger); + } + + public @Nullable T get(NameTree name) { + return this.content.get(name.name()); + } +} diff --git a/src/main/java/edu/kit/kastel/vads/compiler/semantic/ReturnAnalysis.java b/src/main/java/edu/kit/kastel/vads/compiler/semantic/ReturnAnalysis.java new file mode 100644 index 0000000..41b0161 --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/semantic/ReturnAnalysis.java @@ -0,0 +1,30 @@ +package edu.kit.kastel.vads.compiler.semantic; + +import edu.kit.kastel.vads.compiler.parser.ast.FunctionTree; +import edu.kit.kastel.vads.compiler.parser.ast.ReturnTree; +import edu.kit.kastel.vads.compiler.parser.visitor.NoOpVisitor; +import edu.kit.kastel.vads.compiler.parser.visitor.Unit; + +/// Checks that functions return. +/// Currently only works for straight-line code. +class ReturnAnalysis implements NoOpVisitor { + + static class ReturnState { + boolean returns = false; + } + + @Override + public Unit visit(ReturnTree returnTree, ReturnState data) { + data.returns = true; + return NoOpVisitor.super.visit(returnTree, data); + } + + @Override + public Unit visit(FunctionTree functionTree, ReturnState data) { + if (!data.returns) { + throw new SemanticException("function " + functionTree.name() + " does not return"); + } + data.returns = false; + return NoOpVisitor.super.visit(functionTree, data); + } +} diff --git a/src/main/java/edu/kit/kastel/vads/compiler/semantic/SemanticAnalysis.java b/src/main/java/edu/kit/kastel/vads/compiler/semantic/SemanticAnalysis.java new file mode 100644 index 0000000..d4f243d --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/semantic/SemanticAnalysis.java @@ -0,0 +1,19 @@ +package edu.kit.kastel.vads.compiler.semantic; + +import edu.kit.kastel.vads.compiler.parser.ast.ProgramTree; +import edu.kit.kastel.vads.compiler.parser.visitor.RecursivePostorderVisitor; + +public class SemanticAnalysis { + + private final ProgramTree program; + + public SemanticAnalysis(ProgramTree program) { + this.program = program; + } + + public void analyze() { + this.program.accept(new RecursivePostorderVisitor<>(new VariableStatusAnalysis()), new Namespace<>()); + this.program.accept(new RecursivePostorderVisitor<>(new ReturnAnalysis()), new ReturnAnalysis.ReturnState()); + } + +} diff --git a/src/main/java/edu/kit/kastel/vads/compiler/semantic/SemanticException.java b/src/main/java/edu/kit/kastel/vads/compiler/semantic/SemanticException.java new file mode 100644 index 0000000..eb26650 --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/semantic/SemanticException.java @@ -0,0 +1,7 @@ +package edu.kit.kastel.vads.compiler.semantic; + +public class SemanticException extends RuntimeException { + public SemanticException(String message) { + super(message); + } +} diff --git a/src/main/java/edu/kit/kastel/vads/compiler/semantic/VariableStatusAnalysis.java b/src/main/java/edu/kit/kastel/vads/compiler/semantic/VariableStatusAnalysis.java new file mode 100644 index 0000000..e8a8ba5 --- /dev/null +++ b/src/main/java/edu/kit/kastel/vads/compiler/semantic/VariableStatusAnalysis.java @@ -0,0 +1,68 @@ +package edu.kit.kastel.vads.compiler.semantic; + +import edu.kit.kastel.vads.compiler.parser.ast.AssignmentTree; +import edu.kit.kastel.vads.compiler.parser.ast.DeclarationTree; +import edu.kit.kastel.vads.compiler.parser.ast.IdentExpressionTree; +import edu.kit.kastel.vads.compiler.parser.ast.LValueIdentTree; +import edu.kit.kastel.vads.compiler.parser.ast.NameTree; +import edu.kit.kastel.vads.compiler.parser.visitor.NoOpVisitor; +import edu.kit.kastel.vads.compiler.parser.visitor.Unit; +import org.jspecify.annotations.Nullable; + +import java.util.Locale; + +/// Checks that variables are +/// - declared before assignment +/// - not declared twice +/// - not initialized twice +/// - assigned before referenced +class VariableStatusAnalysis implements NoOpVisitor> { + + @Override + public Unit visit(AssignmentTree assignmentTree, Namespace data) { + switch (assignmentTree.lValue()) { + case LValueIdentTree(var name) -> { + VariableStatus status = data.get(name); + checkInitialized(name, status); + } + } + return NoOpVisitor.super.visit(assignmentTree, data); + } + + private static void checkInitialized(NameTree name, @Nullable VariableStatus status) { + if (status == null) { + throw new SemanticException("Variable " + name + " must be declared before assignment"); + } + } + + @Override + public Unit visit(DeclarationTree declarationTree, Namespace data) { + VariableStatus status = declarationTree.initializer() == null + ? VariableStatus.DECLARED + : VariableStatus.INITIALIZED; + data.put(declarationTree.name(), status, (existing, replacement) -> { + if (existing.ordinal() >= replacement.ordinal()) { + throw new SemanticException("variable is already " + existing + ". Cannot be " + replacement + " here."); + } + return replacement; + }); + return NoOpVisitor.super.visit(declarationTree, data); + } + + @Override + public Unit visit(IdentExpressionTree identExpressionTree, Namespace data) { + VariableStatus status = data.get(identExpressionTree.name()); + checkInitialized(identExpressionTree.name(), status); + return NoOpVisitor.super.visit(identExpressionTree, data); + } + + enum VariableStatus { + DECLARED, + INITIALIZED; + + @Override + public String toString() { + return name().toLowerCase(Locale.ROOT); + } + } +} diff --git a/src/main/java/module-info.java b/src/main/java/module-info.java new file mode 100644 index 0000000..8553864 --- /dev/null +++ b/src/main/java/module-info.java @@ -0,0 +1,7 @@ +import org.jspecify.annotations.NullMarked; + +@NullMarked +module edu.kit.kastel.vads.compiler { + requires org.jspecify; + requires java.xml; +} \ No newline at end of file