Skip to content

Commit 04361dd

Browse files
committed
Add more missing MachO constants and structs
Signed-off-by: Jakub Konka <[email protected]>
1 parent 427e2d6 commit 04361dd

File tree

1 file changed

+344
-0
lines changed

1 file changed

+344
-0
lines changed

lib/std/macho.zig

Lines changed: 344 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,182 @@ pub const symtab_command = extern struct {
8181
strsize: u32,
8282
};
8383

84+
/// This is the second set of the symbolic information which is used to support
85+
/// the data structures for the dynamically link editor.
86+
///
87+
/// The original set of symbolic information in the symtab_command which contains
88+
/// the symbol and string tables must also be present when this load command is
89+
/// present. When this load command is present the symbol table is organized
90+
/// into three groups of symbols:
91+
/// local symbols (static and debugging symbols) - grouped by module
92+
/// defined external symbols - grouped by module (sorted by name if not lib)
93+
/// undefined external symbols (sorted by name if MH_BINDATLOAD is not set,
94+
/// and in order the were seen by the static
95+
/// linker if MH_BINDATLOAD is set)
96+
/// In this load command there are offsets and counts to each of the three groups
97+
/// of symbols.
98+
///
99+
/// This load command contains a the offsets and sizes of the following new
100+
/// symbolic information tables:
101+
/// table of contents
102+
/// module table
103+
/// reference symbol table
104+
/// indirect symbol table
105+
/// The first three tables above (the table of contents, module table and
106+
/// reference symbol table) are only present if the file is a dynamically linked
107+
/// shared library. For executable and object modules, which are files
108+
/// containing only one module, the information that would be in these three
109+
/// tables is determined as follows:
110+
/// table of contents - the defined external symbols are sorted by name
111+
/// module table - the file contains only one module so everything in the
112+
/// file is part of the module.
113+
/// reference symbol table - is the defined and undefined external symbols
114+
///
115+
/// For dynamically linked shared library files this load command also contains
116+
/// offsets and sizes to the pool of relocation entries for all sections
117+
/// separated into two groups:
118+
/// external relocation entries
119+
/// local relocation entries
120+
/// For executable and object modules the relocation entries continue to hang
121+
/// off the section structures.
122+
pub const dysymtab_command = extern struct {
123+
/// LC_DYSYMTAB
124+
cmd: u32,
125+
126+
/// sizeof(struct dysymtab_command)
127+
cmdsize: u32,
128+
129+
// The symbols indicated by symoff and nsyms of the LC_SYMTAB load command
130+
// are grouped into the following three groups:
131+
// local symbols (further grouped by the module they are from)
132+
// defined external symbols (further grouped by the module they are from)
133+
// undefined symbols
134+
//
135+
// The local symbols are used only for debugging. The dynamic binding
136+
// process may have to use them to indicate to the debugger the local
137+
// symbols for a module that is being bound.
138+
//
139+
// The last two groups are used by the dynamic binding process to do the
140+
// binding (indirectly through the module table and the reference symbol
141+
// table when this is a dynamically linked shared library file).
142+
143+
/// index of local symbols
144+
ilocalsym: u32,
145+
146+
/// number of local symbols
147+
nlocalsym: u32,
148+
149+
/// index to externally defined symbols
150+
iextdefsym: u32,
151+
152+
/// number of externally defined symbols
153+
nextdefsym: u32,
154+
155+
/// index to undefined symbols
156+
iundefsym: u32,
157+
158+
/// number of undefined symbols
159+
nundefsym: u32,
160+
161+
// For the for the dynamic binding process to find which module a symbol
162+
// is defined in the table of contents is used (analogous to the ranlib
163+
// structure in an archive) which maps defined external symbols to modules
164+
// they are defined in. This exists only in a dynamically linked shared
165+
// library file. For executable and object modules the defined external
166+
// symbols are sorted by name and is use as the table of contents.
167+
168+
/// file offset to table of contents
169+
tocoff: u32,
170+
171+
/// number of entries in table of contents
172+
ntoc: u32,
173+
174+
// To support dynamic binding of "modules" (whole object files) the symbol
175+
// table must reflect the modules that the file was created from. This is
176+
// done by having a module table that has indexes and counts into the merged
177+
// tables for each module. The module structure that these two entries
178+
// refer to is described below. This exists only in a dynamically linked
179+
// shared library file. For executable and object modules the file only
180+
// contains one module so everything in the file belongs to the module.
181+
182+
/// file offset to module table
183+
modtaboff: u32,
184+
185+
/// number of module table entries
186+
nmodtab: u32,
187+
188+
// To support dynamic module binding the module structure for each module
189+
// indicates the external references (defined and undefined) each module
190+
// makes. For each module there is an offset and a count into the
191+
// reference symbol table for the symbols that the module references.
192+
// This exists only in a dynamically linked shared library file. For
193+
// executable and object modules the defined external symbols and the
194+
// undefined external symbols indicates the external references.
195+
196+
/// offset to referenced symbol table
197+
extrefsymoff: u32,
198+
199+
/// number of referenced symbol table entries
200+
nextrefsyms: u32,
201+
202+
// The sections that contain "symbol pointers" and "routine stubs" have
203+
// indexes and (implied counts based on the size of the section and fixed
204+
// size of the entry) into the "indirect symbol" table for each pointer
205+
// and stub. For every section of these two types the index into the
206+
// indirect symbol table is stored in the section header in the field
207+
// reserved1. An indirect symbol table entry is simply a 32bit index into
208+
// the symbol table to the symbol that the pointer or stub is referring to.
209+
// The indirect symbol table is ordered to match the entries in the section.
210+
211+
/// file offset to the indirect symbol table
212+
indirectsymoff: u32,
213+
214+
/// number of indirect symbol table entries
215+
nindirectsyms: u32,
216+
217+
// To support relocating an individual module in a library file quickly the
218+
// external relocation entries for each module in the library need to be
219+
// accessed efficiently. Since the relocation entries can't be accessed
220+
// through the section headers for a library file they are separated into
221+
// groups of local and external entries further grouped by module. In this
222+
// case the presents of this load command who's extreloff, nextrel,
223+
// locreloff and nlocrel fields are non-zero indicates that the relocation
224+
// entries of non-merged sections are not referenced through the section
225+
// structures (and the reloff and nreloc fields in the section headers are
226+
// set to zero).
227+
//
228+
// Since the relocation entries are not accessed through the section headers
229+
// this requires the r_address field to be something other than a section
230+
// offset to identify the item to be relocated. In this case r_address is
231+
// set to the offset from the vmaddr of the first LC_SEGMENT command.
232+
// For MH_SPLIT_SEGS images r_address is set to the the offset from the
233+
// vmaddr of the first read-write LC_SEGMENT command.
234+
//
235+
// The relocation entries are grouped by module and the module table
236+
// entries have indexes and counts into them for the group of external
237+
// relocation entries for that the module.
238+
//
239+
// For sections that are merged across modules there must not be any
240+
// remaining external relocation entries for them (for merged sections
241+
// remaining relocation entries must be local).
242+
243+
/// offset to external relocation entries
244+
extreloff: u32,
245+
246+
/// number of external relocation entries
247+
nextrel: u32,
248+
249+
// All the local relocation entries are grouped together (they are not
250+
// grouped by their module since they are only used if the object is moved
251+
// from it staticly link edited address).
252+
253+
/// offset to local relocation entries
254+
locreloff: u32,
255+
256+
/// number of local relocation entries
257+
nlocrel: u32,
258+
};
259+
84260
/// The linkedit_data_command contains the offsets and sizes of a blob
85261
/// of data in the __LINKEDIT segment.
86262
pub const linkedit_data_command = extern struct {
@@ -97,6 +273,127 @@ pub const linkedit_data_command = extern struct {
97273
datasize: u32,
98274
};
99275

276+
/// The dyld_info_command contains the file offsets and sizes of
277+
/// the new compressed form of the information dyld needs to
278+
/// load the image. This information is used by dyld on Mac OS X
279+
/// 10.6 and later. All information pointed to by this command
280+
/// is encoded using byte streams, so no endian swapping is needed
281+
/// to interpret it.
282+
pub const dyld_info_command = extern struct {
283+
/// LC_DYLD_INFO or LC_DYLD_INFO_ONLY
284+
cmd: u32,
285+
286+
/// sizeof(struct dyld_info_command)
287+
cmdsize: u32,
288+
289+
// Dyld rebases an image whenever dyld loads it at an address different
290+
// from its preferred address. The rebase information is a stream
291+
// of byte sized opcodes whose symbolic names start with REBASE_OPCODE_.
292+
// Conceptually the rebase information is a table of tuples:
293+
// <seg-index, seg-offset, type>
294+
// The opcodes are a compressed way to encode the table by only
295+
// encoding when a column changes. In addition simple patterns
296+
// like "every n'th offset for m times" can be encoded in a few
297+
// bytes.
298+
299+
/// file offset to rebase info
300+
rebase_off: u32,
301+
302+
/// size of rebase info
303+
rebase_size: u32,
304+
305+
// Dyld binds an image during the loading process, if the image
306+
// requires any pointers to be initialized to symbols in other images.
307+
// The bind information is a stream of byte sized
308+
// opcodes whose symbolic names start with BIND_OPCODE_.
309+
// Conceptually the bind information is a table of tuples:
310+
// <seg-index, seg-offset, type, symbol-library-ordinal, symbol-name, addend>
311+
// The opcodes are a compressed way to encode the table by only
312+
// encoding when a column changes. In addition simple patterns
313+
// like for runs of pointers initialzed to the same value can be
314+
// encoded in a few bytes.
315+
316+
/// file offset to binding info
317+
bind_off: u32,
318+
319+
/// size of binding info
320+
bind_size: u32,
321+
322+
// Some C++ programs require dyld to unique symbols so that all
323+
// images in the process use the same copy of some code/data.
324+
// This step is done after binding. The content of the weak_bind
325+
// info is an opcode stream like the bind_info. But it is sorted
326+
// alphabetically by symbol name. This enable dyld to walk
327+
// all images with weak binding information in order and look
328+
// for collisions. If there are no collisions, dyld does
329+
// no updating. That means that some fixups are also encoded
330+
// in the bind_info. For instance, all calls to "operator new"
331+
// are first bound to libstdc++.dylib using the information
332+
// in bind_info. Then if some image overrides operator new
333+
// that is detected when the weak_bind information is processed
334+
// and the call to operator new is then rebound.
335+
336+
/// file offset to weak binding info
337+
weak_bind_off: u32,
338+
339+
/// size of weak binding info
340+
weak_bind_size: u32,
341+
342+
// Some uses of external symbols do not need to be bound immediately.
343+
// Instead they can be lazily bound on first use. The lazy_bind
344+
// are contains a stream of BIND opcodes to bind all lazy symbols.
345+
// Normal use is that dyld ignores the lazy_bind section when
346+
// loading an image. Instead the static linker arranged for the
347+
// lazy pointer to initially point to a helper function which
348+
// pushes the offset into the lazy_bind area for the symbol
349+
// needing to be bound, then jumps to dyld which simply adds
350+
// the offset to lazy_bind_off to get the information on what
351+
// to bind.
352+
353+
/// file offset to lazy binding info
354+
lazy_bind_off: u32,
355+
356+
/// size of lazy binding info
357+
lazy_bind_size: u32,
358+
359+
// The symbols exported by a dylib are encoded in a trie. This
360+
// is a compact representation that factors out common prefixes.
361+
// It also reduces LINKEDIT pages in RAM because it encodes all
362+
// information (name, address, flags) in one small, contiguous range.
363+
// The export area is a stream of nodes. The first node sequentially
364+
// is the start node for the trie.
365+
//
366+
// Nodes for a symbol start with a uleb128 that is the length of
367+
// the exported symbol information for the string so far.
368+
// If there is no exported symbol, the node starts with a zero byte.
369+
// If there is exported info, it follows the length.
370+
//
371+
// First is a uleb128 containing flags. Normally, it is followed by
372+
// a uleb128 encoded offset which is location of the content named
373+
// by the symbol from the mach_header for the image. If the flags
374+
// is EXPORT_SYMBOL_FLAGS_REEXPORT, then following the flags is
375+
// a uleb128 encoded library ordinal, then a zero terminated
376+
// UTF8 string. If the string is zero length, then the symbol
377+
// is re-export from the specified dylib with the same name.
378+
// If the flags is EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER, then following
379+
// the flags is two uleb128s: the stub offset and the resolver offset.
380+
// The stub is used by non-lazy pointers. The resolver is used
381+
// by lazy pointers and must be called to get the actual address to use.
382+
//
383+
// After the optional exported symbol information is a byte of
384+
// how many edges (0-255) that this node has leaving it,
385+
// followed by each edge.
386+
// Each edge is a zero terminated UTF8 of the addition chars
387+
// in the symbol, followed by a uleb128 offset for the node that
388+
// edge points to.
389+
390+
/// file offset to lazy binding info
391+
export_off: u32,
392+
393+
/// size of lazy binding info
394+
export_size: u32,
395+
};
396+
100397
/// A program that uses a dynamic linker contains a dylinker_command to identify
101398
/// the name of the dynamic linker (LC_LOAD_DYLINKER). And a dynamic linker
102399
/// contains a dylinker_command to identify the dynamic linker (LC_ID_DYLINKER).
@@ -681,6 +978,24 @@ pub const N_TYPE = 0x0e;
681978
/// external symbol bit, set for external symbols
682979
pub const N_EXT = 0x01;
683980

981+
/// symbol is undefined
982+
pub const N_UNDF = 0x0;
983+
984+
/// symbol is absolute
985+
pub const N_ABS = 0x2;
986+
987+
/// symbol is defined in the section number given in n_sect
988+
pub const N_SECT = 0xe;
989+
990+
/// symbol is undefined and the image is using a prebound
991+
/// value for the symbol
992+
pub const N_PBUD = 0xc;
993+
994+
/// symbol is defined to be the same as another symbol; the n_value
995+
/// field is an index into the string table specifying the name of the
996+
/// other symbol
997+
pub const N_INDR = 0xa;
998+
684999
/// global symbol: name,,NO_SECT,type,0
6851000
pub const N_GSYM = 0x20;
6861001

@@ -781,6 +1096,35 @@ pub const N_LENG = 0xfe;
7811096
/// a debug section
7821097
pub const S_ATTR_DEBUG = 0x02000000;
7831098

1099+
/// section contains only true machine instructions
1100+
pub const S_ATTR_PURE_INSTRUCTIONS = 0x80000000;
1101+
1102+
/// section contains coalesced symbols that are not to be in a ranlib
1103+
/// table of contents
1104+
pub const S_ATTR_NO_TOC = 0x40000000;
1105+
1106+
/// ok to strip static symbols in this section in files with the
1107+
/// MH_DYLDLINK flag
1108+
pub const S_ATTR_STRIP_STATIC_SYMS = 0x20000000;
1109+
1110+
/// no dead stripping
1111+
pub const S_ATTR_NO_DEAD_STRIP = 0x10000000;
1112+
1113+
/// blocks are live if they reference live blocks
1114+
pub const S_ATTR_LIVE_SUPPORT = 0x8000000;
1115+
1116+
/// used with i386 code stubs written on by dyld
1117+
pub const S_ATTR_SELF_MODIFYING_CODE = 0x4000000;
1118+
1119+
/// section contains some machine instructions
1120+
pub const S_ATTR_SOME_INSTRUCTIONS = 0x400;
1121+
1122+
/// section has external relocation entries
1123+
pub const S_ATTR_EXT_RELOC = 0x200;
1124+
1125+
/// section has local relocation entries
1126+
pub const S_ATTR_LOC_RELOC = 0x100;
1127+
7841128
pub const cpu_type_t = integer_t;
7851129
pub const cpu_subtype_t = integer_t;
7861130
pub const integer_t = c_int;

0 commit comments

Comments
 (0)