Skip to content

Example plugin #8

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 15 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,332 @@
{-# LANGUAGE ScopedTypeVariables, TupleSections #-}

module Example where

import GHC.Data.Bag
import GHC.Plugins
import GHC.Iface.Load hiding (loadCore)
import GHC.Iface.Syntax
import GHC.IfaceToCore hiding (tcIfaceModGuts)
import GHC.Tc.Types
import GHC.Tc.Utils.Monad
import GHC.Hs.Binds
import GHC.Hs.Extension
import GHC.Core.ConLike
import GHC.Iface.Env
import GHC.Iface.Binary
import GHC.Core.InstEnv
import GHC.Core.FamInstEnv
import GHC.CoreToIface
import GHC.Types.Name.Env
import Control.Monad
import GHC.Hs.Expr
import GHC (getModuleGraph)
import Data.IORef
import GHC.Types.Unique.DFM
import System.IO.Unsafe
import Data.Maybe
import GHC.Data.Maybe


{-
We add our plugins to the core compiler pipeline. This plugin stage passes us
the existing passes, including those added by GHC and other (previous) plugins,
and expects back a list of passes that we want to include. In this case, we
don't want to remove any existing passes, so we append our passes to the start
and end of the core pipeline.
-}
plugin :: Plugin
plugin = defaultPlugin { installCoreToDos = install }


{-
Our modified core pipeline is as follows:
* `plutus` - using the `inline` plugin flag:
an inlining plugin that mimicks binding AST traversal as a stand-in
for Plutus
* `todos` - the existing core pipeline given by GHC
* `bindsPass` - print the bindings (after the inliner has optionally run), for `diff`ing
* `printer` - using the `print` plugin flag:
pretty-print the `HscEnv` `ModGuts` to the console, for debugging
* `serialiser` - using the `inline` plugin flag:

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As discussed on Slack, this is the bit we really need to control via a GHC flag if we can.

output core ASTs for bindings into the interface file
-}
install :: [CommandLineOption] -> [CoreToDo] -> CoreM [CoreToDo]
install args todos = return $ join [plutusPass, todos, bindsPass, printerPass, serialiserPass]
where
plutusPass | elem "inline" args = [CoreDoPluginPass "plutus" plutus]
| otherwise = []
printerPass | elem "print" args = [CoreDoPluginPass "printer" printer]
| otherwise = []
bindsPass = [CoreDoPluginPass "binds" bindsPlug]
serialiserPass | elem "inline" args = [CoreDoPluginPass "serialiser" serialiser]
| otherwise = []


{-
A core plugin pass to print the `mg_binds` to the console.
We return the argument unchanged.
-}
bindsPlug :: ModGuts -> CoreM ModGuts
bindsPlug guts = do
env <- getHscEnv
liftIO . putStrLn . showSDoc (hsc_dflags env) . ppr $ mg_binds guts
return guts


{-
A core plugin pass to print the `HscEnv` and `ModGuts` to the console.
We return the argument unchanged.
-}
printer :: ModGuts -> CoreM ModGuts
printer guts = do
env <- getHscEnv
liftIO $ do
printHscEnv env
printModGuts env guts
return guts


{-
From the `HscEnv`, we're interested in:
* The `ModuleGraph`:
* In one-shot mode, this is empty, because all modues are treated as external
* In normal mode, this contains the current module, and all of its dependencies
from the home package
* We can extract a list of the `ModSummary`s of these modules using `mgModSummaries`
* The `HomePackageTable`:
* This is a Map (as a `UDFM`) keyed on module name `Unique`s to lookup a structure
containing the `ModIface` and `ModDetails` of a module
* This contains the previously compiled modules, but not the current module.
Because dependencies are always compiled first, we can (in normal mode) assume
that the dependencies will exist here
* Again, this is empty in one-shot mode
* The `ExternalPackageState`:
* This is stored in an IORef, and is modified by opening `ModIface`s of external
modules
* Since one-shot mode treats all modules as external, we will find the home package
modules
* The `PackageIfaceTable` is a Map (as a `ModuleEnv`) keyed on `Module`s to lookup
`ModIface`s
-}
printHscEnv :: HscEnv -> IO ()
printHscEnv (HscEnv dflags targets mod_graph _ic hpt eps' nc' _fc' _type_env' _iserv' _dynlinker _ext) = do
eps <- readIORef eps'
nc <- readIORef nc'
putStrLn . showSDoc dflags $ vcat
[ text "***HscEnv***"
, ppr targets
, ppr (mgModSummaries mod_graph)
, ppr (mapUDFM (mi_module . hm_iface) hpt)
, ppr (eltsUFM $ eps_is_boot eps)
, ppr (moduleEnvKeys $ eps_PIT eps)
, text "!!!HscEnv!!!"
]

{-
From the `ModGuts`, we can retrieve the actual core of the exported bindings for this
module, under the `mg_binds` record field, as a `type CoreProgram = [Bind CoreBndr]`.
-}
printModGuts :: HscEnv -> ModGuts -> IO ()
printModGuts env (ModGuts mod _hsc_src _loc _exports deps _usages _used_th rdr_env _fix_env _tcs _insts _fam_insts _patsyns _rules binds _foreign _foreign_files
_warns _anns _complete_sigs _hpc_info _modBreaks _inst_env _fam_inst_env _safe_haskell _trust_pkg _doc_hdr _decl_dogs _arg_docs
) = do
putStrLn . showSDoc (hsc_dflags env) $ vcat
[ text "***ModGuts***"
, ppr mod
, ppr (dep_mods deps)
, ppr (dep_pkgs deps)
, ppr (dep_orphs deps)
, ppr (dep_finsts deps)
, ppr (dep_plgins deps)
, ppr rdr_env
, ppr binds
, text "!!!ModGuts!!!"
]


{-
Here we use the `registerInterfaceDataWith` machinery of extensible interface files
to record our serialised data as a field in the `HscEnv`, which later gets added to
the `ModGuts` to be written with the `.hi` interface file.

Because the core bindings contain GHC `Name`s and `FastString`s, which are serialised
in a lookup table, we need to use `putWithUserData` to write to a raw `BinHandle`. If
our data didn't contain either of these types, we could use `registerInterfaceData`
to avoid the raw handle and instead go via the `GHC.Binary` instance.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just checking: is this stuff documented in GHC for less savvy users?


Note that the loading function will later require the `SrcSpan` for error reporting,
so we serialise the `mg_loc` here too.
-}
serialiser :: ModGuts -> CoreM ModGuts
serialiser guts = do
env <- getHscEnv
liftIO . registerInterfaceDataWith "plutus/core-bindings" env $ \bh ->
putWithUserData (const $ return ()) bh (mg_loc guts, map toIfaceBind $ mg_binds guts)
return guts


{-
For our Plutus stand-in, we first retrieve the `HscEnv`, which is required to lookup
`Name`s within the `HomePackageTable` and `PackageIfaceTable` to perform the knot-tying
of serialised `Iface`* structures into proper in-memory reference-based structures.

We want to cache the binds we load, so we use `newLoadBind` to initialise the `IORef`
for this - giving us an `IO` function mapping `Name`s to `Bind Id`s.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Another advantage of having this in GHC would be hiding this stuff from poor users 😅 although for our usecase it's fine if it's hairy.

-}
plutus :: ModGuts -> CoreM ModGuts
plutus guts = do
env <- getHscEnv
lookup <- liftIO $ newLoadBind env guts
binds' <- liftIO $ mapM (inlineCore lookup) (mg_binds guts)
return guts{ mg_binds = binds' }


{-
Bindings in GHC have two cases:
* a single regular binding, `NonRec`:
* top-level bindings that reference each-other are included in this case
* mutually recursive bindings, `Rec`, including:
* bindings with multiple equations
* self-referential bindings, such as `xs = ():xs`.
-}
inlineCore :: (Name -> IO (Maybe (Bind CoreBndr))) -> Bind Id -> IO (Bind Id)
inlineCore lookup (NonRec n expr) = NonRec n <$> inlineCoreExpr lookup expr
inlineCore lookup (Rec pairs) = Rec <$> mapM inlineCorePair pairs

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just while I'm here, I'd probably write something like:

Rec <$> for pairs $ \(n, b) -> (n,) <$> inlineCoreExpr lookup b

where
inlineCorePair (n, b) = (n,) <$> inlineCoreExpr lookup b


{-
We recurse the structure of the `Bind Id` AST to replace one level of `Name`s
with the core data we retrieve using the `lookup` function, if we find the
core in the extensible interface field.

The main case here is the `Var v` constructor, which contains a `Name` that
we want to potentially inline.
-}
inlineCoreExpr :: (Name -> IO (Maybe (Bind CoreBndr))) -> Expr Id -> IO (Expr Id)
inlineCoreExpr lookup = go
where
go :: Expr Id -> IO (Expr Id)
go (Var v) = do
look <- lookup (varName v)
return $ case look of
Just (NonRec _ expr) -> expr

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about recursive bindings?

Nothing -> Var v
go (Lit l ) = return $ Lit l
go (App e1 e2 ) = App <$> go e1 <*> go e2
go (Lam b e ) = Lam b <$> go e
go (Let b e ) = Let b <$> go e
go (Case e b t alts) = Case e b t <$> mapM goAlt alts

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can do the same trick with for to make inlining goAlts palatable.

go (Cast e coer ) = (`Cast` coer) <$> go e

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FWIW I prefer the stupider but IMO clearer Cast <$> go e <*> pure coer

go (Tick ti e ) = Tick ti <$> go e
go (Type t ) = return $ Type t
go (Coercion c ) = return $ Coercion c

goAlt (con, bndrs, rhs) = (con, bndrs,) <$> go rhs


{-
Retrieve the name of a top-level binding. In the case of recursive bindings,
we assume (based on which types of bindings we have determined become top-level
recursive bindings):
* The binding has at least one case
* All cases have the same `Name`.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🤔

Is this really right? What about

odd = ... even ...
even = ... odd ...

?

-}
nameOf :: Bind Id -> Name
nameOf (NonRec n _) = idName n
nameOf (Rec ((n, _):_)) = idName n


{-
Perform interface `typecheck` loading from this binding's extensible interface
field within the deserialised `ModIface` to load the bindings that the field
contains, if the field exists.
-}
loadCoreBindings :: ModIface -> IfL (Maybe [Bind CoreBndr])
loadCoreBindings iface@ModIface{mi_module = mod} = do
ncu <- mkNameCacheUpdater
mbinds <- liftIO (readIfaceFieldWith "plutus/core-bindings" (getWithUserData ncu) iface)
case mbinds of
Just (loc, ibinds) -> Just . catMaybes <$> mapM (tcIfaceBinding mod loc) ibinds
Nothing -> return Nothing


{-
Initialise a stateful `IO` function for loading core bindings by loading the
relevant `ModIface` from disk. Each interface that is loaded has its bindings
cached within an `IORef (ModuleEnv (Maybe (NameEnv (Bind CoreBndr))))`.

The current module doesn't have an interface file yet, so we recover its binds
from the `ModGuts` instead. However, since we're running this plugin early in
the core pipeline, the current module's binds won't have passed through the
core optimisation phases yet, so if we're inlining `Name`s from this module
we'll have to rely on the optimisations being run on our already inlined ASTs.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is fine. Although it also depends when we do the serialization of the ModGuts for dependent modules, no? If we do it before we do lots of optimization then we'll also get unoptimized code from them. I think we probably should do it relatively early, but I'm not sure.

-}
newLoadBind :: HscEnv -> ModGuts -> IO (Name -> IO (Maybe (Bind CoreBndr)))
newLoadBind hscEnv guts = do
let binds = mkNameEnvWith nameOf (mg_binds guts)
modBindsR <- newIORef (extendModuleEnv emptyModuleEnv (mg_module guts) (Just binds))
return (loadBind modBindsR hscEnv)

{-
Return the `Bind` for the given `Name`. Cache the results of disk lookups.
-}
loadBind :: IORef (ModuleEnv (Maybe (NameEnv (Bind CoreBndr))))
-> HscEnv
-> Name
-> IO (Maybe (Bind CoreBndr))
loadBind modBindsR env name = do
eps <- hscEPS env
modBinds <- readIORef modBindsR
case nameModule_maybe name of
Just mod | Just iface <- lookupIfaceByModule (hsc_HPT env) (eps_PIT eps) mod -> do
case lookupModuleEnv modBinds mod of
Just Nothing -> return Nothing -- We've already checked this module, and it doesn't have bindings
-- serialised - probably because it's from an external package,
-- but it could also have not been compiled with the plugin.
Just (Just binds) -> return $ lookupNameEnv binds name -- We've imported this module - lookup the binding.
Nothing -> do -- Try and import the module.
bnds <- initIfaceLoad env $
initIfaceLcl (mi_semantic_module iface) (text "core") NotBoot $
loadCoreBindings iface
case bnds of
Just bds -> do
let binds' = mkNameEnvWith nameOf bds
writeIORef modBindsR (extendModuleEnv modBinds mod (Just binds'))
return $ lookupNameEnv binds' name
Nothing -> do
writeIORef modBindsR (extendModuleEnv modBinds mod Nothing)
return Nothing
_ -> return Nothing

-------------------------------------------------------------------------------
-- Interface loading for top-level bindings
-------------------------------------------------------------------------------

{-
Certain RHSs fail to typecheck due to the error `Iface id out of scope: ...`.
In particular, this workaround is used to exclude GHC's special type reflection
bindings from causing problems in loading.

In theory, we should be removing them during serialisation, but they are structured
as real bindings, so we would have to do a fragile test on the `Name`.
-}
tcIfaceBinding :: Module -> SrcSpan -> IfaceBinding -> IfL (Maybe (Bind Id))
tcIfaceBinding mod loc ibind =
rightToMaybe <$> tryAllM (tcIfaceBinding' mod loc ibind)

tcIfaceBinding' :: Module -> SrcSpan -> IfaceBinding -> IfL (Bind Id)
tcIfaceBinding' mod loc b =
case b of
IfaceNonRec letbndr rhs -> uncurry NonRec <$> go letbndr rhs
IfaceRec pairs -> Rec <$> mapM (uncurry go) pairs
where
go (IfLetBndr fs ty info ji) rhs = do
name <- lookupIfaceTop (mkVarOccFS fs)
ty' <- tcIfaceType ty
rhs' <- tcIfaceExpr rhs
let id = mkExportedVanillaId name ty' `asJoinId_maybe` tcJoinInfo ji
return (id, rhs')

Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
module Lib (f, xs, g, x, y, infinite, mapMyData, sumMyData, MyData(..)) where

-- Prevent GHC from inlining the bindings from this module to demonstrate
-- the plugin doing so.
{-# NOINLINE a #-}
{-# NOINLINE b #-}
{-# NOINLINE x #-}
{-# NOINLINE y #-}
{-# NOINLINE f #-}
{-# NOINLINE xs #-}
{-# NOINLINE g #-}
{-# NOINLINE infinite #-}
{-# NOINLINE mapMyData #-}
{-# NOINLINE sumMyData #-}

-- Not exported, to test the serialised bindings
a, b :: Int
a = 0
b = a

x, y :: Int
x = 1
y = x + 2

f :: Int -> Int
f = plus 1
where
plus = (+)

-- Test for self-recursive bindings
xs :: [()]
xs = ():xs

-- Test for multiple-equation bindings
g :: Int -> Bool
g 0 = True
g _ = False

infinite :: a -> [a]
infinite x = x : infinite x

data MyData a b
= Leaf a b
| Something Int (MyData a b)
| Pair (MyData a b) (MyData a b)
deriving Show

mapMyData :: (a -> b -> (c, d)) -> MyData a b -> MyData c d
mapMyData fn (Leaf x y) = uncurry Leaf (fn x y)
mapMyData fn (Something n x) = Something n (mapMyData fn x)
mapMyData fn (Pair x1 x2) = Pair (mapMyData fn x1) (mapMyData fn x2)

sumMyData :: MyData a b -> Int
sumMyData Leaf{} = 0
sumMyData (Something n x) = n + sumMyData x
sumMyData (Pair x1 x2) = sumMyData x1 + sumMyData x2
Loading