diff --git a/examples/extensible-interface-files/core-plugins/basic-example/Example.hs b/examples/extensible-interface-files/core-plugins/basic-example/Example.hs new file mode 100755 index 0000000..46c88e3 --- /dev/null +++ b/examples/extensible-interface-files/core-plugins/basic-example/Example.hs @@ -0,0 +1,332 @@ +{-# LANGUAGE ScopedTypeVariables, TupleSections #-} + +module Example where + +import GHC.Data.Bag +import GHC.Plugins +import GHC.Iface.Load hiding (loadCore) +import GHC.Iface.Syntax +import GHC.IfaceToCore hiding (tcIfaceModGuts) +import GHC.Tc.Types +import GHC.Tc.Utils.Monad +import GHC.Hs.Binds +import GHC.Hs.Extension +import GHC.Core.ConLike +import GHC.Iface.Env +import GHC.Iface.Binary +import GHC.Core.InstEnv +import GHC.Core.FamInstEnv +import GHC.CoreToIface +import GHC.Types.Name.Env +import Control.Monad +import GHC.Hs.Expr +import GHC (getModuleGraph) +import Data.IORef +import GHC.Types.Unique.DFM +import System.IO.Unsafe +import Data.Maybe +import GHC.Data.Maybe + + +{- +We add our plugins to the core compiler pipeline. This plugin stage passes us +the existing passes, including those added by GHC and other (previous) plugins, +and expects back a list of passes that we want to include. In this case, we +don't want to remove any existing passes, so we append our passes to the start +and end of the core pipeline. +-} +plugin :: Plugin +plugin = defaultPlugin { installCoreToDos = install } + + +{- +Our modified core pipeline is as follows: +* `plutus` - using the `inline` plugin flag: + an inlining plugin that mimicks binding AST traversal as a stand-in + for Plutus +* `todos` - the existing core pipeline given by GHC +* `bindsPass` - print the bindings (after the inliner has optionally run), for `diff`ing +* `printer` - using the `print` plugin flag: + pretty-print the `HscEnv` `ModGuts` to the console, for debugging +* `serialiser` - using the `inline` plugin flag: + output core ASTs for bindings into the interface file +-} +install :: [CommandLineOption] -> [CoreToDo] -> CoreM [CoreToDo] +install args todos = return $ join [plutusPass, todos, bindsPass, printerPass, serialiserPass] + where + plutusPass | elem "inline" args = [CoreDoPluginPass "plutus" plutus] + | otherwise = [] + printerPass | elem "print" args = [CoreDoPluginPass "printer" printer] + | otherwise = [] + bindsPass = [CoreDoPluginPass "binds" bindsPlug] + serialiserPass | elem "inline" args = [CoreDoPluginPass "serialiser" serialiser] + | otherwise = [] + + +{- +A core plugin pass to print the `mg_binds` to the console. +We return the argument unchanged. +-} +bindsPlug :: ModGuts -> CoreM ModGuts +bindsPlug guts = do + env <- getHscEnv + liftIO . putStrLn . showSDoc (hsc_dflags env) . ppr $ mg_binds guts + return guts + + +{- +A core plugin pass to print the `HscEnv` and `ModGuts` to the console. +We return the argument unchanged. +-} +printer :: ModGuts -> CoreM ModGuts +printer guts = do + env <- getHscEnv + liftIO $ do + printHscEnv env + printModGuts env guts + return guts + + +{- +From the `HscEnv`, we're interested in: +* The `ModuleGraph`: + * In one-shot mode, this is empty, because all modues are treated as external + * In normal mode, this contains the current module, and all of its dependencies + from the home package + * We can extract a list of the `ModSummary`s of these modules using `mgModSummaries` +* The `HomePackageTable`: + * This is a Map (as a `UDFM`) keyed on module name `Unique`s to lookup a structure + containing the `ModIface` and `ModDetails` of a module + * This contains the previously compiled modules, but not the current module. + Because dependencies are always compiled first, we can (in normal mode) assume + that the dependencies will exist here + * Again, this is empty in one-shot mode +* The `ExternalPackageState`: + * This is stored in an IORef, and is modified by opening `ModIface`s of external + modules + * Since one-shot mode treats all modules as external, we will find the home package + modules + * The `PackageIfaceTable` is a Map (as a `ModuleEnv`) keyed on `Module`s to lookup + `ModIface`s +-} +printHscEnv :: HscEnv -> IO () +printHscEnv (HscEnv dflags targets mod_graph _ic hpt eps' nc' _fc' _type_env' _iserv' _dynlinker _ext) = do + eps <- readIORef eps' + nc <- readIORef nc' + putStrLn . showSDoc dflags $ vcat + [ text "***HscEnv***" + , ppr targets + , ppr (mgModSummaries mod_graph) + , ppr (mapUDFM (mi_module . hm_iface) hpt) + , ppr (eltsUFM $ eps_is_boot eps) + , ppr (moduleEnvKeys $ eps_PIT eps) + , text "!!!HscEnv!!!" + ] + +{- +From the `ModGuts`, we can retrieve the actual core of the exported bindings for this +module, under the `mg_binds` record field, as a `type CoreProgram = [Bind CoreBndr]`. +-} +printModGuts :: HscEnv -> ModGuts -> IO () +printModGuts env (ModGuts mod _hsc_src _loc _exports deps _usages _used_th rdr_env _fix_env _tcs _insts _fam_insts _patsyns _rules binds _foreign _foreign_files + _warns _anns _complete_sigs _hpc_info _modBreaks _inst_env _fam_inst_env _safe_haskell _trust_pkg _doc_hdr _decl_dogs _arg_docs + ) = do + putStrLn . showSDoc (hsc_dflags env) $ vcat + [ text "***ModGuts***" + , ppr mod + , ppr (dep_mods deps) + , ppr (dep_pkgs deps) + , ppr (dep_orphs deps) + , ppr (dep_finsts deps) + , ppr (dep_plgins deps) + , ppr rdr_env + , ppr binds + , text "!!!ModGuts!!!" + ] + + +{- +Here we use the `registerInterfaceDataWith` machinery of extensible interface files +to record our serialised data as a field in the `HscEnv`, which later gets added to +the `ModGuts` to be written with the `.hi` interface file. + +Because the core bindings contain GHC `Name`s and `FastString`s, which are serialised +in a lookup table, we need to use `putWithUserData` to write to a raw `BinHandle`. If +our data didn't contain either of these types, we could use `registerInterfaceData` +to avoid the raw handle and instead go via the `GHC.Binary` instance. + +Note that the loading function will later require the `SrcSpan` for error reporting, +so we serialise the `mg_loc` here too. +-} +serialiser :: ModGuts -> CoreM ModGuts +serialiser guts = do + env <- getHscEnv + liftIO . registerInterfaceDataWith "plutus/core-bindings" env $ \bh -> + putWithUserData (const $ return ()) bh (mg_loc guts, map toIfaceBind $ mg_binds guts) + return guts + + +{- +For our Plutus stand-in, we first retrieve the `HscEnv`, which is required to lookup +`Name`s within the `HomePackageTable` and `PackageIfaceTable` to perform the knot-tying +of serialised `Iface`* structures into proper in-memory reference-based structures. + +We want to cache the binds we load, so we use `newLoadBind` to initialise the `IORef` +for this - giving us an `IO` function mapping `Name`s to `Bind Id`s. +-} +plutus :: ModGuts -> CoreM ModGuts +plutus guts = do + env <- getHscEnv + lookup <- liftIO $ newLoadBind env guts + binds' <- liftIO $ mapM (inlineCore lookup) (mg_binds guts) + return guts{ mg_binds = binds' } + + +{- +Bindings in GHC have two cases: +* a single regular binding, `NonRec`: + * top-level bindings that reference each-other are included in this case +* mutually recursive bindings, `Rec`, including: + * bindings with multiple equations + * self-referential bindings, such as `xs = ():xs`. +-} +inlineCore :: (Name -> IO (Maybe (Bind CoreBndr))) -> Bind Id -> IO (Bind Id) +inlineCore lookup (NonRec n expr) = NonRec n <$> inlineCoreExpr lookup expr +inlineCore lookup (Rec pairs) = Rec <$> mapM inlineCorePair pairs + where + inlineCorePair (n, b) = (n,) <$> inlineCoreExpr lookup b + + +{- +We recurse the structure of the `Bind Id` AST to replace one level of `Name`s +with the core data we retrieve using the `lookup` function, if we find the +core in the extensible interface field. + +The main case here is the `Var v` constructor, which contains a `Name` that +we want to potentially inline. +-} +inlineCoreExpr :: (Name -> IO (Maybe (Bind CoreBndr))) -> Expr Id -> IO (Expr Id) +inlineCoreExpr lookup = go + where + go :: Expr Id -> IO (Expr Id) + go (Var v) = do + look <- lookup (varName v) + return $ case look of + Just (NonRec _ expr) -> expr + Nothing -> Var v + go (Lit l ) = return $ Lit l + go (App e1 e2 ) = App <$> go e1 <*> go e2 + go (Lam b e ) = Lam b <$> go e + go (Let b e ) = Let b <$> go e + go (Case e b t alts) = Case e b t <$> mapM goAlt alts + go (Cast e coer ) = (`Cast` coer) <$> go e + go (Tick ti e ) = Tick ti <$> go e + go (Type t ) = return $ Type t + go (Coercion c ) = return $ Coercion c + + goAlt (con, bndrs, rhs) = (con, bndrs,) <$> go rhs + + +{- +Retrieve the name of a top-level binding. In the case of recursive bindings, +we assume (based on which types of bindings we have determined become top-level +recursive bindings): +* The binding has at least one case +* All cases have the same `Name`. +-} +nameOf :: Bind Id -> Name +nameOf (NonRec n _) = idName n +nameOf (Rec ((n, _):_)) = idName n + + +{- +Perform interface `typecheck` loading from this binding's extensible interface +field within the deserialised `ModIface` to load the bindings that the field +contains, if the field exists. +-} +loadCoreBindings :: ModIface -> IfL (Maybe [Bind CoreBndr]) +loadCoreBindings iface@ModIface{mi_module = mod} = do + ncu <- mkNameCacheUpdater + mbinds <- liftIO (readIfaceFieldWith "plutus/core-bindings" (getWithUserData ncu) iface) + case mbinds of + Just (loc, ibinds) -> Just . catMaybes <$> mapM (tcIfaceBinding mod loc) ibinds + Nothing -> return Nothing + + +{- +Initialise a stateful `IO` function for loading core bindings by loading the +relevant `ModIface` from disk. Each interface that is loaded has its bindings +cached within an `IORef (ModuleEnv (Maybe (NameEnv (Bind CoreBndr))))`. + +The current module doesn't have an interface file yet, so we recover its binds +from the `ModGuts` instead. However, since we're running this plugin early in +the core pipeline, the current module's binds won't have passed through the +core optimisation phases yet, so if we're inlining `Name`s from this module +we'll have to rely on the optimisations being run on our already inlined ASTs. +-} +newLoadBind :: HscEnv -> ModGuts -> IO (Name -> IO (Maybe (Bind CoreBndr))) +newLoadBind hscEnv guts = do + let binds = mkNameEnvWith nameOf (mg_binds guts) + modBindsR <- newIORef (extendModuleEnv emptyModuleEnv (mg_module guts) (Just binds)) + return (loadBind modBindsR hscEnv) + +{- +Return the `Bind` for the given `Name`. Cache the results of disk lookups. +-} +loadBind :: IORef (ModuleEnv (Maybe (NameEnv (Bind CoreBndr)))) + -> HscEnv + -> Name + -> IO (Maybe (Bind CoreBndr)) +loadBind modBindsR env name = do + eps <- hscEPS env + modBinds <- readIORef modBindsR + case nameModule_maybe name of + Just mod | Just iface <- lookupIfaceByModule (hsc_HPT env) (eps_PIT eps) mod -> do + case lookupModuleEnv modBinds mod of + Just Nothing -> return Nothing -- We've already checked this module, and it doesn't have bindings + -- serialised - probably because it's from an external package, + -- but it could also have not been compiled with the plugin. + Just (Just binds) -> return $ lookupNameEnv binds name -- We've imported this module - lookup the binding. + Nothing -> do -- Try and import the module. + bnds <- initIfaceLoad env $ + initIfaceLcl (mi_semantic_module iface) (text "core") NotBoot $ + loadCoreBindings iface + case bnds of + Just bds -> do + let binds' = mkNameEnvWith nameOf bds + writeIORef modBindsR (extendModuleEnv modBinds mod (Just binds')) + return $ lookupNameEnv binds' name + Nothing -> do + writeIORef modBindsR (extendModuleEnv modBinds mod Nothing) + return Nothing + _ -> return Nothing + +------------------------------------------------------------------------------- +-- Interface loading for top-level bindings +------------------------------------------------------------------------------- + +{- +Certain RHSs fail to typecheck due to the error `Iface id out of scope: ...`. +In particular, this workaround is used to exclude GHC's special type reflection +bindings from causing problems in loading. + +In theory, we should be removing them during serialisation, but they are structured +as real bindings, so we would have to do a fragile test on the `Name`. +-} +tcIfaceBinding :: Module -> SrcSpan -> IfaceBinding -> IfL (Maybe (Bind Id)) +tcIfaceBinding mod loc ibind = + rightToMaybe <$> tryAllM (tcIfaceBinding' mod loc ibind) + +tcIfaceBinding' :: Module -> SrcSpan -> IfaceBinding -> IfL (Bind Id) +tcIfaceBinding' mod loc b = + case b of + IfaceNonRec letbndr rhs -> uncurry NonRec <$> go letbndr rhs + IfaceRec pairs -> Rec <$> mapM (uncurry go) pairs + where + go (IfLetBndr fs ty info ji) rhs = do + name <- lookupIfaceTop (mkVarOccFS fs) + ty' <- tcIfaceType ty + rhs' <- tcIfaceExpr rhs + let id = mkExportedVanillaId name ty' `asJoinId_maybe` tcJoinInfo ji + return (id, rhs') + diff --git a/examples/extensible-interface-files/core-plugins/basic-example/Lib.hs b/examples/extensible-interface-files/core-plugins/basic-example/Lib.hs new file mode 100755 index 0000000..f57f2ee --- /dev/null +++ b/examples/extensible-interface-files/core-plugins/basic-example/Lib.hs @@ -0,0 +1,56 @@ +module Lib (f, xs, g, x, y, infinite, mapMyData, sumMyData, MyData(..)) where + +-- Prevent GHC from inlining the bindings from this module to demonstrate +-- the plugin doing so. +{-# NOINLINE a #-} +{-# NOINLINE b #-} +{-# NOINLINE x #-} +{-# NOINLINE y #-} +{-# NOINLINE f #-} +{-# NOINLINE xs #-} +{-# NOINLINE g #-} +{-# NOINLINE infinite #-} +{-# NOINLINE mapMyData #-} +{-# NOINLINE sumMyData #-} + +-- Not exported, to test the serialised bindings +a, b :: Int +a = 0 +b = a + +x, y :: Int +x = 1 +y = x + 2 + +f :: Int -> Int +f = plus 1 + where + plus = (+) + +-- Test for self-recursive bindings +xs :: [()] +xs = ():xs + +-- Test for multiple-equation bindings +g :: Int -> Bool +g 0 = True +g _ = False + +infinite :: a -> [a] +infinite x = x : infinite x + +data MyData a b + = Leaf a b + | Something Int (MyData a b) + | Pair (MyData a b) (MyData a b) + deriving Show + +mapMyData :: (a -> b -> (c, d)) -> MyData a b -> MyData c d +mapMyData fn (Leaf x y) = uncurry Leaf (fn x y) +mapMyData fn (Something n x) = Something n (mapMyData fn x) +mapMyData fn (Pair x1 x2) = Pair (mapMyData fn x1) (mapMyData fn x2) + +sumMyData :: MyData a b -> Int +sumMyData Leaf{} = 0 +sumMyData (Something n x) = n + sumMyData x +sumMyData (Pair x1 x2) = sumMyData x1 + sumMyData x2 diff --git a/examples/extensible-interface-files/core-plugins/basic-example/Main.hs b/examples/extensible-interface-files/core-plugins/basic-example/Main.hs new file mode 100755 index 0000000..16a5e4c --- /dev/null +++ b/examples/extensible-interface-files/core-plugins/basic-example/Main.hs @@ -0,0 +1,47 @@ +module Main where + +import Lib + +main :: IO () +main = do + + -- Simpler examples + print (t x y) + print (mapMyData fn_md md1) + print (sumMyData md1) + + -- This example both tests the inliner's ability to correctly inline composed + -- functions and compares this being done within an expression with it being + -- done in a top-level binding. + print (sumMyData (mapMyData fn_md md1)) + print (sumMapMyData md1) + +t :: Int -> Int -> Bool +t a b = g (f a + b) + +md1 :: MyData Int Int +md1 = Pair + (Pair (Leaf 1 2) + (Leaf 3 4)) + (Pair (Something 5 (Leaf 6 7)) + (Leaf 8 9)) + +md2 :: MyData () () +md2 = Leaf () () + +md3 :: MyData () () +md3 = Something 0 md2 + +-- This structure is infinite on the last field, so we want to make sure that +-- the inlining takes that into account and only inlines the first field. +-- We should consider that Plutus will need to know about the infinite recursion +-- when the AST is produced, so it's important that we can detect where this +-- reference happens. +md4 :: MyData () () +md4 = Pair md3 md4 + +fn_md :: Int -> Int -> (Bool, Int) +fn_md x y = (x == y, x + y) + +sumMapMyData :: MyData Int Int -> Int +sumMapMyData x = sumMyData (mapMyData fn_md x) diff --git a/examples/extensible-interface-files/core-plugins/basic-example/Makefile b/examples/extensible-interface-files/core-plugins/basic-example/Makefile new file mode 100755 index 0000000..f5fcda0 --- /dev/null +++ b/examples/extensible-interface-files/core-plugins/basic-example/Makefile @@ -0,0 +1,41 @@ +# default to ghc, but allow us to override the compiler used by providing +# HC, e.g. HC=/path/to/ghc-stage2 make ... +HC ?= ghc +# Same for HC_FLAGS (e.g. HC_FLAGS="-v"). +# We'll *always* add -dynamic to it though. +# +# Need -dynamic, otherwise ghc will complain. +# it appears it defaults to loading dyn_o +# plugins. +# +# : fatal: +# cannot find object file ‘./Example.dyn_o’ +# while linking an interpreted expression +HC_FLAGS += -dynamic +diff: vanilla core-inline + diff -u vanilla.core inlined.core +vanilla: + mkdir -p vanilla + cd vanilla && $(HC) $(HC_FLAGS) -odir . -hidir . ../Example.hs -package ghc + cd vanilla && $(HC) $(HC_FLAGS) -odir . -hidir . -fplugin Example -c ../Lib.hs + cd vanilla && $(HC) $(HC_FLAGS) -odir . -hidir . -fplugin Example -c ../Main.hs -o ../VanillaMain | tee ../vanilla.core +# I get a lot of errors with not being able to find modules properly if I use commands +# like the vanilla version. This seems to work, but I would like to figure out what +# causes the actual problem. +# +# There are a variety of bugs with plugins and locating them. Unless we package the +# Example plugin into it's own package, ghc will complain about +# +# attempting to use module ‘main:Example’ (Example.hs) which is not loaded +# +# in the final linking step; which makes little sense. +core-inline: + mkdir -p inlined + cd inlined && $(HC) $(HC_FLAGS) -odir . -hidir . ../Example.hs -package ghc + cd inlined && $(HC) $(HC_FLAGS) -odir . -hidir . -fplugin Example -fplugin-opt Example:inline -c ../Lib.hs + cd inlined && $(HC) $(HC_FLAGS) -odir . -hidir . -fplugin Example -fplugin-opt Example:inline -c ../Main.hs -o ../InlinedMain | tee ../inlined.core +clean: + rm -rf vanilla inlined vanilla.core inlined.core VanillaMain InlinedMain +# Launch ghcid for instant-feedback type-checking +ghcid: + ghcid --command="ghci Main.hs Lib.hs Example.hs -package ghc" diff --git a/examples/extensible-interface-files/core-plugins/basic-example/README.md b/examples/extensible-interface-files/core-plugins/basic-example/README.md new file mode 100644 index 0000000..8d154e5 --- /dev/null +++ b/examples/extensible-interface-files/core-plugins/basic-example/README.md @@ -0,0 +1,98 @@ +## Motivation + +Plutus acts as an embedded-language compiler hosted within GHC Haskell to compile a Haskell program +into a Plutus AST, which will then be used in code generation for programs to be run on the blockchain. + +In order to recover a Plutus AST from a Haskell function, the Plutus plugin inspects the Core of +bindings to essentially inline the AST and provide that to the Plutus compiler. But, plugins are +only fed the bindings of the current module, so we must recover the AST represented by any external +names in a different way. + +Currently, without extensible interface files, Plutus uses the unfoldings to recover the Core +represented by a name from an external module. Unfoldings are normally used by GHC perform regular +Haskell inlining. However, this has a number of disadvantages: +* The user needs to annotate all such external bindings with `INLINE` +* Alternately, the `-f-expose-all-unfoldings` flag can be used to equivalently generate unfoldings + for all bindings in a project or file +* Annotating bindings with `INLINABLE` almost guarantees that a binding is inlined, so doing so + will very likely cause efficiency problems in code generation. Similarly, the presence of the + unfoldings produced by the flag also mess with GHC's inlining algorithm. + +Instead, we want to demonstrate a more semantically correct version of this using core bindings +exposed in extensible interface files. Extensible interface files allow us to store arbirary +serialisable data within the regular interface files, including data from a non-GHC source. +Additionally, extensible interface files aim to give `.hi` files a more well-defined structure, +so that external tools can interact with them. With this extensibility, we add a flag to GHC to +serialise the `ModGuts` into an interface file field. The `ModGuts` represents the entire compilation +state of a module after the Core pipeline has been run, before it's later converted to STG. +Among other things, the `ModGuts` contains the core bindings for all exported definitions in a +module. + +This example project intends to show the process of writing the core interface field, loading it +within the `IfG a`/`IfL a` interface global/local loading environments, and finally using that data +to lookup the right-hand sides of external names that appear in the core that is given in the plugin +environment of the currently compiling module. + +## Deliverables + +* Example structure of a plugin using extensible interface files and the `-fwrite-core-field` flag +* Usage of loading the `IfG`/`IfL` (interface load) monad environments into `IO`, which can then be + lifted into the plugin action via `MonadIO` +* Inlining the plugin binds based on the core recovered from interface files of dependencies + +## Design and Implementation + +We define a `Lib` module, which contains stand-in definitions representing external bindings that +would require being looked up if their names appeared in the right-hand sides of the definitions +in another module. This module is compiled with the `-fwrite-core-field` flag, so its interface +file will contain a field that can be deserialised into a `ModGuts`. + +Then, the `Lib` module is imported into the `Main` module, which then makes use of these imported +definitions in its own definitions. `Main` is then compiled with the `Example` plugin, which +performs a version of inlining to immitate Plutus. Because `Lib`'s definitions are external to +`Main`, when the plugin encounters a name from `Lib`, it will have to recover the core from `Lib.hs`'s +core extensible interface field. + +The `Example` module, containing the post-type checking plugin, makes use of the core found in +interface files to perform a form of inlining to demonstrate usage for Plutus. The current module's +bindings are found in the plugin argument, `TcGblEnv`, and the `TcM` monad in which the plugin +operates gives us access to other environment data that we require to load interfaces. + +## Details + +* Loading interface files is done in the `type IfL a = IOEnv (Env IfGbl IfLcl) a` monad, while + core plugins are in the `CoreM` enviroment. `IfL` is meant to be eventually run in `IO`, and + `CoreM` is `MonadIO`, so it can be lifted into the plugin, but initialising the interface loading + does require some extra data: + * We must first initialise the global enviroment, which requires the `HscEnv`, which can be + retrieved from `getHscEnv`. This `HscEnv` differs based on the current module being compiled + * For each external module that is being imported, the local enviroment must be initialised with + its `Module` data structure, essentially setting that one as active. These `Module`s are able + to be recovered from the module graph contained in the `HscEnv` + +## Building + +### GHC + +With https://gitlab.haskell.org/ghc/ghc/-/tree/wip/pluginExtFields run: + +``` +./boot +./configure +make -j8 +sudo make install +``` + +Then include export `/usr/local/bin` into the current session's path. + +Additionally, `configure` can be passed a `--prefix=/my/ghc/install/path` flag as an alternative +to `/usr/local/bin`. + +### This Package + +Makefile targets: +* diff (default): build both with and without the plugin, and compare the resulting core +* vanilla: build without the plugin, and output the core to vanilla.core +* core-inline: build with the plugin, and output the core to inlined.core +* clean: clean build results +* ghcid: run ghcid to provide instant-feedback type-checking