Ruby 3.0.5p211 (2022-11-24 revision ba5cf0f7c52d4d35cc6a173c89eda98ceffa2dcf)
mjit_worker.c
Go to the documentation of this file.
1/**********************************************************************
2
3 mjit_worker.c - Worker for MRI method JIT compiler
4
5 Copyright (C) 2017 Vladimir Makarov <vmakarov@redhat.com>.
6
7**********************************************************************/
8
9// NOTE: All functions in this file are executed on MJIT worker. So don't
10// call Ruby methods (C functions that may call rb_funcall) or trigger
11// GC (using ZALLOC, xmalloc, xfree, etc.) in this file.
12
13/* However, note that calling `free` for resources `xmalloc`-ed in mjit.c,
14 which is currently done in some places, is sometimes problematic in the
15 following situations:
16
17 * malloc library could be different between interpreter and extensions
18 on Windows (perhaps not applicable to MJIT because CC is the same)
19 * xmalloc -> free leaks extra space used for USE_GC_MALLOC_OBJ_INFO_DETAILS
20 (not enabled by default)
21
22 ...in short, it's usually not a problem in MJIT. But maybe it's worth
23 fixing for consistency or for USE_GC_MALLOC_OBJ_INFO_DETAILS support.
24*/
25
26/* We utilize widely used C compilers (GCC and LLVM Clang) to
27 implement MJIT. We feed them a C code generated from ISEQ. The
28 industrial C compilers are slower than regular JIT engines.
29 Generated code performance of the used C compilers has a higher
30 priority over the compilation speed.
31
32 So our major goal is to minimize the ISEQ compilation time when we
33 use widely optimization level (-O2). It is achieved by
34
35 o Using a precompiled version of the header
36 o Keeping all files in `/tmp`. On modern Linux `/tmp` is a file
37 system in memory. So it is pretty fast
38 o Implementing MJIT as a multi-threaded code because we want to
39 compile ISEQs in parallel with iseq execution to speed up Ruby
40 code execution. MJIT has one thread (*worker*) to do
41 parallel compilations:
42 o It prepares a precompiled code of the minimized header.
43 It starts at the MRI execution start
44 o It generates PIC object files of ISEQs
45 o It takes one JIT unit from a priority queue unless it is empty.
46 o It translates the JIT unit ISEQ into C-code using the precompiled
47 header, calls CC and load PIC code when it is ready
48 o Currently MJIT put ISEQ in the queue when ISEQ is called
49 o MJIT can reorder ISEQs in the queue if some ISEQ has been called
50 many times and its compilation did not start yet
51 o MRI reuses the machine code if it already exists for ISEQ
52 o The machine code we generate can stop and switch to the ISEQ
53 interpretation if some condition is not satisfied as the machine
54 code can be speculative or some exception raises
55 o Speculative machine code can be canceled.
56
57 Here is a diagram showing the MJIT organization:
58
59 _______
60 |header |
61 |_______|
62 | MRI building
63 --------------|----------------------------------------
64 | MRI execution
65 |
66 _____________|_____
67 | | |
68 | ___V__ | CC ____________________
69 | | |----------->| precompiled header |
70 | | | | |____________________|
71 | | | | |
72 | | MJIT | | |
73 | | | | |
74 | | | | ____V___ CC __________
75 | |______|----------->| C code |--->| .so file |
76 | | |________| |__________|
77 | | |
78 | | |
79 | MRI machine code |<-----------------------------
80 |___________________| loading
81
82*/
83
84#ifdef __sun
85#define __EXTENSIONS__ 1
86#endif
87
88#include "vm_core.h"
89#include "vm_callinfo.h"
90#include "mjit.h"
91#include "gc.h"
92#include "ruby_assert.h"
93#include "ruby/debug.h"
94#include "ruby/thread.h"
95#include "ruby/version.h"
96#include "builtin.h"
97#include "insns.inc"
98#include "insns_info.inc"
99#include "internal/compile.h"
100
101#ifdef _WIN32
102#include <winsock2.h>
103#include <windows.h>
104#else
105#include <sys/wait.h>
106#include <sys/time.h>
107#include <dlfcn.h>
108#endif
109#include <errno.h>
110#ifdef HAVE_FCNTL_H
111#include <fcntl.h>
112#endif
113#ifdef HAVE_SYS_PARAM_H
114# include <sys/param.h>
115#endif
116#include "dln.h"
117
118#include "ruby/util.h"
119#undef strdup // ruby_strdup may trigger GC
120
121#ifndef MAXPATHLEN
122# define MAXPATHLEN 1024
123#endif
124
125#ifdef _WIN32
126#define dlopen(name,flag) ((void*)LoadLibrary(name))
127#define dlerror() strerror(rb_w32_map_errno(GetLastError()))
128#define dlsym(handle,name) ((void*)GetProcAddress((handle),(name)))
129#define dlclose(handle) (!FreeLibrary(handle))
130#define RTLD_NOW -1
131
132#define waitpid(pid,stat_loc,options) (WaitForSingleObject((HANDLE)(pid), INFINITE), GetExitCodeProcess((HANDLE)(pid), (LPDWORD)(stat_loc)), CloseHandle((HANDLE)pid), (pid))
133#define WIFEXITED(S) ((S) != STILL_ACTIVE)
134#define WEXITSTATUS(S) (S)
135#define WIFSIGNALED(S) (0)
136typedef intptr_t pid_t;
137#endif
138
139// Atomically set function pointer if possible.
140#define MJIT_ATOMIC_SET(var, val) (void)ATOMIC_PTR_EXCHANGE(var, val)
141
142#define MJIT_TMP_PREFIX "_ruby_mjit_"
143
144// JIT compaction requires the header transformation because linking multiple .o files
145// doesn't work without having `static` in the same function definitions. We currently
146// don't support transforming the MJIT header on Windows.
147#ifdef _WIN32
148# define USE_JIT_COMPACTION 0
149#else
150# define USE_JIT_COMPACTION 1
151#endif
152
153// The unit structure that holds metadata of ISeq for MJIT.
155 struct list_node unode;
156 // Unique order number of unit.
157 int id;
158 // Dlopen handle of the loaded object file.
159 void *handle;
161#if defined(_WIN32)
162 // DLL cannot be removed while loaded on Windows. If this is set, it'll be lazily deleted.
163 char *so_file;
164#endif
165 // Only used by unload_units. Flag to check this unit is currently on stack or not.
167 // True if this is still in active_units but it's to be lazily removed
169 // mjit_compile's optimization switches
170 struct rb_mjit_compile_info compile_info;
171 // captured CC values, they should be marked with iseq.
172 const struct rb_callcache **cc_entries;
173 unsigned int cc_entries_size; // iseq->body->ci_size + ones of inlined iseqs
174};
175
176// Linked list of struct rb_mjit_unit.
178 struct list_head head;
179 int length; // the list length
180};
181
182extern void rb_native_mutex_lock(rb_nativethread_lock_t *lock);
183extern void rb_native_mutex_unlock(rb_nativethread_lock_t *lock);
184extern void rb_native_mutex_initialize(rb_nativethread_lock_t *lock);
185extern void rb_native_mutex_destroy(rb_nativethread_lock_t *lock);
186
191extern void rb_native_cond_wait(rb_nativethread_cond_t *cond, rb_nativethread_lock_t *mutex);
192
193// process.c
194extern rb_pid_t ruby_waitpid_locked(rb_vm_t *, rb_pid_t, int *status, int options, rb_nativethread_cond_t *cond);
195
196// A copy of MJIT portion of MRI options since MJIT initialization. We
197// need them as MJIT threads still can work when the most MRI data were
198// freed.
199struct mjit_options mjit_opts;
200
201// true if MJIT is enabled.
202bool mjit_enabled = false;
203// true if JIT-ed code should be called. When `ruby_vm_event_enabled_global_flags & ISEQ_TRACE_EVENTS`
204// and `mjit_call_p == false`, any JIT-ed code execution is cancelled as soon as possible.
205bool mjit_call_p = false;
206
207// Priority queue of iseqs waiting for JIT compilation.
208// This variable is a pointer to head unit of the queue.
209static struct rb_mjit_unit_list unit_queue = { LIST_HEAD_INIT(unit_queue.head) };
210// List of units which are successfully compiled.
211static struct rb_mjit_unit_list active_units = { LIST_HEAD_INIT(active_units.head) };
212// List of compacted so files which will be cleaned up by `free_list()` in `mjit_finish()`.
213static struct rb_mjit_unit_list compact_units = { LIST_HEAD_INIT(compact_units.head) };
214// List of units before recompilation and just waiting for dlclose().
215static struct rb_mjit_unit_list stale_units = { LIST_HEAD_INIT(stale_units.head) };
216// The number of so far processed ISEQs, used to generate unique id.
217static int current_unit_num;
218// A mutex for conitionals and critical sections.
219static rb_nativethread_lock_t mjit_engine_mutex;
220// A thread conditional to wake up `mjit_finish` at the end of PCH thread.
221static rb_nativethread_cond_t mjit_pch_wakeup;
222// A thread conditional to wake up the client if there is a change in
223// executed unit status.
224static rb_nativethread_cond_t mjit_client_wakeup;
225// A thread conditional to wake up a worker if there we have something
226// to add or we need to stop MJIT engine.
227static rb_nativethread_cond_t mjit_worker_wakeup;
228// A thread conditional to wake up workers if at the end of GC.
229static rb_nativethread_cond_t mjit_gc_wakeup;
230// Greater than 0 when GC is working.
231static int in_gc = 0;
232// True when JIT is working.
233static bool in_jit = false;
234// True when active_units has at least one stale_p=true unit.
235static bool pending_stale_p = false;
236// The times when unload_units is requested. unload_units is called after some requests.
237static int unload_requests = 0;
238// The total number of unloaded units.
239static int total_unloads = 0;
240// Set to true to stop worker.
241static bool stop_worker_p;
242// Set to true if worker is stopped.
243static bool worker_stopped;
244
245// Path of "/tmp", which can be changed to $TMP in MinGW.
246static char *tmp_dir;
247// Hash like { 1 => true, 2 => true, ... } whose keys are valid `class_serial`s.
248// This is used to invalidate obsoleted CALL_CACHE.
249static VALUE valid_class_serials;
250
251// Used C compiler path.
252static const char *cc_path;
253// Used C compiler flags.
254static const char **cc_common_args;
255// Used C compiler flags added by --jit-debug=...
256static char **cc_added_args;
257// Name of the precompiled header file.
258static char *pch_file;
259// The process id which should delete the pch_file on mjit_finish.
260static rb_pid_t pch_owner_pid;
261// Status of the precompiled header creation. The status is
262// shared by the workers and the pch thread.
263static enum {PCH_NOT_READY, PCH_FAILED, PCH_SUCCESS} pch_status;
264
265#ifndef _MSC_VER
266// Name of the header file.
267static char *header_file;
268#endif
269
270#ifdef _WIN32
271// Linker option to enable libruby.
272static char *libruby_pathflag;
273#endif
274
275#include "mjit_config.h"
276
277#if defined(__GNUC__) && \
278 (!defined(__clang__) || \
279 (defined(__clang__) && (defined(__FreeBSD__) || defined(__GLIBC__))))
280# define GCC_PIC_FLAGS "-Wfatal-errors", "-fPIC", "-shared", "-w", "-pipe",
281# define MJIT_CFLAGS_PIPE 1
282#else
283# define GCC_PIC_FLAGS /* empty */
284# define MJIT_CFLAGS_PIPE 0
285#endif
286
287// Use `-nodefaultlibs -nostdlib` for GCC where possible, which does not work on mingw, cygwin, AIX, and OpenBSD.
288// This seems to improve MJIT performance on GCC.
289#if defined __GNUC__ && !defined __clang__ && !defined(_WIN32) && !defined(__CYGWIN__) && !defined(_AIX) && !defined(__OpenBSD__)
290# define GCC_NOSTDLIB_FLAGS "-nodefaultlibs", "-nostdlib",
291#else
292# define GCC_NOSTDLIB_FLAGS // empty
293#endif
294
295static const char *const CC_COMMON_ARGS[] = {
297 NULL
298};
299
300static const char *const CC_DEBUG_ARGS[] = {MJIT_DEBUGFLAGS NULL};
301static const char *const CC_OPTIMIZE_ARGS[] = {MJIT_OPTFLAGS NULL};
302
303static const char *const CC_LDSHARED_ARGS[] = {MJIT_LDSHARED GCC_PIC_FLAGS NULL};
304static const char *const CC_DLDFLAGS_ARGS[] = {MJIT_DLDFLAGS NULL};
305// `CC_LINKER_ARGS` are linker flags which must be passed to `-c` as well.
306static const char *const CC_LINKER_ARGS[] = {
307#if defined __GNUC__ && !defined __clang__ && !defined(__OpenBSD__)
308 "-nostartfiles",
309#endif
311};
312
313static const char *const CC_LIBS[] = {
314#if defined(_WIN32) || defined(__CYGWIN__)
315 MJIT_LIBS // mswin, mingw, cygwin
316#endif
317#if defined __GNUC__ && !defined __clang__
318# if defined(_WIN32)
319 "-lmsvcrt", // mingw
320# endif
321 "-lgcc", // mingw, cygwin, and GCC platforms using `-nodefaultlibs -nostdlib`
322#endif
323#if defined __ANDROID__
324 "-lm", // to avoid 'cannot locate symbol "modf" referenced by .../_ruby_mjit_XXX.so"'
325#endif
326 NULL
327};
328
329#define CC_CODEFLAG_ARGS (mjit_opts.debug ? CC_DEBUG_ARGS : CC_OPTIMIZE_ARGS)
330
331// Print the arguments according to FORMAT to stderr only if MJIT
332// verbose option value is more or equal to LEVEL.
333PRINTF_ARGS(static void, 2, 3)
334verbose(int level, const char *format, ...)
335{
336 if (mjit_opts.verbose >= level) {
337 va_list args;
338 size_t len = strlen(format);
339 char *full_format = alloca(sizeof(char) * (len + 2));
340
341 // Creating `format + '\n'` to atomically print format and '\n'.
342 memcpy(full_format, format, len);
343 full_format[len] = '\n';
344 full_format[len+1] = '\0';
345
346 va_start(args, format);
347 vfprintf(stderr, full_format, args);
348 va_end(args);
349 }
350}
351
352PRINTF_ARGS(static void, 1, 2)
353mjit_warning(const char *format, ...)
354{
355 if (mjit_opts.warnings || mjit_opts.verbose) {
356 va_list args;
357
358 fprintf(stderr, "MJIT warning: ");
359 va_start(args, format);
360 vfprintf(stderr, format, args);
361 va_end(args);
362 fprintf(stderr, "\n");
363 }
364}
365
366// Add unit node to the tail of doubly linked `list`. It should be not in
367// the list before.
368static void
369add_to_list(struct rb_mjit_unit *unit, struct rb_mjit_unit_list *list)
370{
371 (void)RB_DEBUG_COUNTER_INC_IF(mjit_length_unit_queue, list == &unit_queue);
372 (void)RB_DEBUG_COUNTER_INC_IF(mjit_length_active_units, list == &active_units);
373 (void)RB_DEBUG_COUNTER_INC_IF(mjit_length_compact_units, list == &compact_units);
374 (void)RB_DEBUG_COUNTER_INC_IF(mjit_length_stale_units, list == &stale_units);
375
376 list_add_tail(&list->head, &unit->unode);
377 list->length++;
378}
379
380static void
381remove_from_list(struct rb_mjit_unit *unit, struct rb_mjit_unit_list *list)
382{
383#if USE_DEBUG_COUNTER
384 rb_debug_counter_add(RB_DEBUG_COUNTER_mjit_length_unit_queue, -1, list == &unit_queue);
385 rb_debug_counter_add(RB_DEBUG_COUNTER_mjit_length_active_units, -1, list == &active_units);
386 rb_debug_counter_add(RB_DEBUG_COUNTER_mjit_length_compact_units, -1, list == &compact_units);
387 rb_debug_counter_add(RB_DEBUG_COUNTER_mjit_length_stale_units, -1, list == &stale_units);
388#endif
389
390 list_del(&unit->unode);
391 list->length--;
392}
393
394static void
395remove_file(const char *filename)
396{
397 if (remove(filename)) {
398 mjit_warning("failed to remove \"%s\": %s", filename, strerror(errno));
399 }
400}
401
402// Lazily delete .so files.
403static void
404clean_temp_files(struct rb_mjit_unit *unit)
405{
406#if defined(_WIN32)
407 if (unit->so_file) {
408 char *so_file = unit->so_file;
409
410 unit->so_file = NULL;
411 // unit->so_file is set only when mjit_opts.save_temps is false.
412 remove_file(so_file);
413 free(so_file);
414 }
415#endif
416}
417
418// This is called in the following situations:
419// 1) On dequeue or `unload_units()`, associated ISeq is already GCed.
420// 2) The unit is not called often and unloaded by `unload_units()`.
421// 3) Freeing lists on `mjit_finish()`.
422//
423// `jit_func` value does not matter for 1 and 3 since the unit won't be used anymore.
424// For the situation 2, this sets the ISeq's JIT state to NOT_COMPILED_JIT_ISEQ_FUNC
425// to prevent the situation that the same methods are continuously compiled.
426static void
427free_unit(struct rb_mjit_unit *unit)
428{
429 if (unit->iseq) { // ISeq is not GCed
430 unit->iseq->body->jit_func = (mjit_func_t)NOT_COMPILED_JIT_ISEQ_FUNC;
431 unit->iseq->body->jit_unit = NULL;
432 }
433 if (unit->cc_entries) {
434 void *entries = (void *)unit->cc_entries;
435 free(entries);
436 }
437 if (unit->handle && dlclose(unit->handle)) { // handle is NULL if it's in queue
438 mjit_warning("failed to close handle for u%d: %s", unit->id, dlerror());
439 }
440 clean_temp_files(unit);
441 free(unit);
442}
443
444// Start a critical section. Use message `msg` to print debug info at `level`.
445static inline void
446CRITICAL_SECTION_START(int level, const char *msg)
447{
448 verbose(level, "Locking %s", msg);
449 rb_native_mutex_lock(&mjit_engine_mutex);
450 verbose(level, "Locked %s", msg);
451}
452
453// Finish the current critical section. Use message `msg` to print
454// debug info at `level`.
455static inline void
456CRITICAL_SECTION_FINISH(int level, const char *msg)
457{
458 verbose(level, "Unlocked %s", msg);
459 rb_native_mutex_unlock(&mjit_engine_mutex);
460}
461
462static int
463sprint_uniq_filename(char *str, size_t size, unsigned long id, const char *prefix, const char *suffix)
464{
465 return snprintf(str, size, "%s/%sp%"PRI_PIDT_PREFIX"uu%lu%s", tmp_dir, prefix, getpid(), id, suffix);
466}
467
468// Return time in milliseconds as a double.
469#ifdef __APPLE__
470double ruby_real_ms_time(void);
471# define real_ms_time() ruby_real_ms_time()
472#else
473static double
474real_ms_time(void)
475{
476# ifdef HAVE_CLOCK_GETTIME
477 struct timespec tv;
478# ifdef CLOCK_MONOTONIC
479 const clockid_t c = CLOCK_MONOTONIC;
480# else
481 const clockid_t c = CLOCK_REALTIME;
482# endif
483
484 clock_gettime(c, &tv);
485 return tv.tv_nsec / 1000000.0 + tv.tv_sec * 1000.0;
486# else
487 struct timeval tv;
488
489 gettimeofday(&tv, NULL);
490 return tv.tv_usec / 1000.0 + tv.tv_sec * 1000.0;
491# endif
492}
493#endif
494
495// Return true if class_serial is not obsoleted. This is used by mjit_compile.c.
496bool
498{
499 CRITICAL_SECTION_START(3, "in valid_class_serial_p");
500 bool found_p = rb_hash_stlike_lookup(valid_class_serials, LONG2FIX(class_serial), NULL);
501 CRITICAL_SECTION_FINISH(3, "in valid_class_serial_p");
502 return found_p;
503}
504
505// Return the best unit from list. The best is the first
506// high priority unit or the unit whose iseq has the biggest number
507// of calls so far.
508static struct rb_mjit_unit *
509get_from_list(struct rb_mjit_unit_list *list)
510{
511 while (in_gc) {
512 verbose(3, "Waiting wakeup from GC");
513 rb_native_cond_wait(&mjit_gc_wakeup, &mjit_engine_mutex);
514 }
515 in_jit = true; // Lock GC
516
517 // Find iseq with max total_calls
518 struct rb_mjit_unit *unit = NULL, *next, *best = NULL;
519 list_for_each_safe(&list->head, unit, next, unode) {
520 if (unit->iseq == NULL) { // ISeq is GCed.
521 remove_from_list(unit, list);
522 free_unit(unit);
523 continue;
524 }
525
526 if (best == NULL || best->iseq->body->total_calls < unit->iseq->body->total_calls) {
527 best = unit;
528 }
529 }
530
531 in_jit = false; // Unlock GC
532 verbose(3, "Sending wakeup signal to client in a mjit-worker for GC");
533 rb_native_cond_signal(&mjit_client_wakeup);
534
535 if (best) {
536 remove_from_list(best, list);
537 }
538 return best;
539}
540
541// Return length of NULL-terminated array `args` excluding the NULL marker.
542static size_t
543args_len(char *const *args)
544{
545 size_t i;
546
547 for (i = 0; (args[i]) != NULL;i++)
548 ;
549 return i;
550}
551
552// Concatenate `num` passed NULL-terminated arrays of strings, put the
553// result (with NULL end marker) into the heap, and return the result.
554static char **
555form_args(int num, ...)
556{
557 va_list argp;
558 size_t len, n;
559 int i;
560 char **args, **res, **tmp;
561
562 va_start(argp, num);
563 res = NULL;
564 for (i = len = 0; i < num; i++) {
565 args = va_arg(argp, char **);
566 n = args_len(args);
567 if ((tmp = (char **)realloc(res, sizeof(char *) * (len + n + 1))) == NULL) {
568 free(res);
569 res = NULL;
570 break;
571 }
572 res = tmp;
573 MEMCPY(res + len, args, char *, n + 1);
574 len += n;
575 }
576 va_end(argp);
577 return res;
578}
579
581#if __has_warning("-Wdeprecated-declarations") || RBIMPL_COMPILER_IS(GCC)
582COMPILER_WARNING_IGNORED(-Wdeprecated-declarations)
583#endif
584// Start an OS process of absolute executable path with arguments `argv`.
585// Return PID of the process.
586static pid_t
587start_process(const char *abspath, char *const *argv)
588{
589 // Not calling non-async-signal-safe functions between vfork
590 // and execv for safety
591 int dev_null = rb_cloexec_open(ruby_null_device, O_WRONLY, 0);
592 if (dev_null < 0) {
593 verbose(1, "MJIT: Failed to open a null device: %s", strerror(errno));
594 return -1;
595 }
596 if (mjit_opts.verbose >= 2) {
597 const char *arg;
598 fprintf(stderr, "Starting process: %s", abspath);
599 for (int i = 0; (arg = argv[i]) != NULL; i++)
600 fprintf(stderr, " %s", arg);
601 fprintf(stderr, "\n");
602 }
603
604 pid_t pid;
605#ifdef _WIN32
606 extern HANDLE rb_w32_start_process(const char *abspath, char *const *argv, int out_fd);
607 int out_fd = 0;
608 if (mjit_opts.verbose <= 1) {
609 // Discard cl.exe's outputs like:
610 // _ruby_mjit_p12u3.c
611 // Creating library C:.../_ruby_mjit_p12u3.lib and object C:.../_ruby_mjit_p12u3.exp
612 out_fd = dev_null;
613 }
614
615 pid = (pid_t)rb_w32_start_process(abspath, argv, out_fd);
616 if (pid == 0) {
617 verbose(1, "MJIT: Failed to create process: %s", dlerror());
618 return -1;
619 }
620#else
621 if ((pid = vfork()) == 0) { /* TODO: reuse some function in process.c */
622 umask(0077);
623 if (mjit_opts.verbose == 0) {
624 // CC can be started in a thread using a file which has been
625 // already removed while MJIT is finishing. Discard the
626 // messages about missing files.
627 dup2(dev_null, STDERR_FILENO);
628 dup2(dev_null, STDOUT_FILENO);
629 }
630 (void)close(dev_null);
631 pid = execv(abspath, argv); // Pid will be negative on an error
632 // Even if we successfully found CC to compile PCH we still can
633 // fail with loading the CC in very rare cases for some reasons.
634 // Stop the forked process in this case.
635 verbose(1, "MJIT: Error in execv: %s", abspath);
636 _exit(1);
637 }
638#endif
639 (void)close(dev_null);
640 return pid;
641}
643
644// Execute an OS process of executable PATH with arguments ARGV.
645// Return -1 or -2 if failed to execute, otherwise exit code of the process.
646// TODO: Use a similar function in process.c
647static int
648exec_process(const char *path, char *const argv[])
649{
650 int stat, exit_code = -2;
651 rb_vm_t *vm = WAITPID_USE_SIGCHLD ? GET_VM() : 0;
653
654 if (vm) {
657 }
658
659 pid_t pid = start_process(path, argv);
660 for (;pid > 0;) {
661 pid_t r = vm ? ruby_waitpid_locked(vm, pid, &stat, 0, &cond)
662 : waitpid(pid, &stat, 0);
663 if (r == -1) {
664 if (errno == EINTR) continue;
665 fprintf(stderr, "[%"PRI_PIDT_PREFIX"d] waitpid(%lu): %s (SIGCHLD=%d,%u)\n",
666 getpid(), (unsigned long)pid, strerror(errno),
668 break;
669 }
670 else if (r == pid) {
671 if (WIFEXITED(stat)) {
672 exit_code = WEXITSTATUS(stat);
673 break;
674 }
675 else if (WIFSIGNALED(stat)) {
676 exit_code = -1;
677 break;
678 }
679 }
680 }
681
682 if (vm) {
685 }
686 return exit_code;
687}
688
689static void
690remove_so_file(const char *so_file, struct rb_mjit_unit *unit)
691{
692#if defined(_WIN32)
693 // Windows can't remove files while it's used.
694 unit->so_file = strdup(so_file); // lazily delete on `clean_temp_files()`
695 if (unit->so_file == NULL)
696 mjit_warning("failed to allocate memory to lazily remove '%s': %s", so_file, strerror(errno));
697#else
698 remove_file(so_file);
699#endif
700}
701
702// Print _mjitX, but make a human-readable funcname when --jit-debug is used
703static void
704sprint_funcname(char *funcname, const struct rb_mjit_unit *unit)
705{
706 const rb_iseq_t *iseq = unit->iseq;
707 if (iseq == NULL || (!mjit_opts.debug && !mjit_opts.debug_flags)) {
708 sprintf(funcname, "_mjit%d", unit->id);
709 return;
710 }
711
712 // Generate a short path
713 const char *path = RSTRING_PTR(rb_iseq_path(iseq));
714 const char *lib = "/lib/";
716 while (strstr(path, lib)) // skip "/lib/"
717 path = strstr(path, lib) + strlen(lib);
718 while (strstr(path, version)) // skip "/x.y.z/"
719 path = strstr(path, version) + strlen(version);
720
721 // Annotate all-normalized method names
722 const char *method = RSTRING_PTR(iseq->body->location.label);
723 if (!strcmp(method, "[]")) method = "AREF";
724 if (!strcmp(method, "[]=")) method = "ASET";
725
726 // Print and normalize
727 sprintf(funcname, "_mjit%d_%s_%s", unit->id, path, method);
728 for (size_t i = 0; i < strlen(funcname); i++) {
729 char c = funcname[i];
730 if (!(('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || ('0' <= c && c <= '9') || c == '_')) {
731 funcname[i] = '_';
732 }
733 }
734}
735
736static const rb_iseq_t **compiling_iseqs = NULL;
737
738static bool
739set_compiling_iseqs(const rb_iseq_t *iseq)
740{
741 compiling_iseqs = calloc(iseq->body->iseq_size + 2, sizeof(rb_iseq_t *)); // 2: 1 (unit->iseq) + 1 (NULL end)
742 if (compiling_iseqs == NULL)
743 return false;
744
745 compiling_iseqs[0] = iseq;
746 int i = 1;
747
748 unsigned int pos = 0;
749 while (pos < iseq->body->iseq_size) {
750#if OPT_DIRECT_THREADED_CODE || OPT_CALL_THREADED_CODE
751 int insn = rb_vm_insn_addr2insn((void *)iseq->body->iseq_encoded[pos]);
752#else
753 int insn = (int)iseq->body->iseq_encoded[pos];
754#endif
755 if (insn == BIN(opt_send_without_block)) {
756 CALL_DATA cd = (CALL_DATA)iseq->body->iseq_encoded[pos + 1];
757 extern const rb_iseq_t *rb_mjit_inlinable_iseq(const struct rb_callinfo *ci, const struct rb_callcache *cc);
758 const rb_iseq_t *iseq = rb_mjit_inlinable_iseq(cd->ci, cd->cc);
759 if (iseq != NULL) {
760 compiling_iseqs[i] = iseq;
761 i++;
762 }
763 }
764 pos += insn_len(insn);
765 }
766 return true;
767}
768
769bool
771{
772 assert(compiling_iseqs != NULL);
773 int i = 0;
774 while (compiling_iseqs[i]) {
775 if (compiling_iseqs[i] == iseq) return true;
776 i++;
777 }
778 return false;
779}
780
781static const int c_file_access_mode =
782#ifdef O_BINARY
783 O_BINARY|
784#endif
785 O_WRONLY|O_EXCL|O_CREAT;
786
787#define append_str2(p, str, len) ((char *)memcpy((p), str, (len))+(len))
788#define append_str(p, str) append_str2(p, str, sizeof(str)-1)
789#define append_lit(p, str) append_str2(p, str, rb_strlen_lit(str))
790
791#ifdef _MSC_VER
792// Compile C file to so. It returns true if it succeeds. (mswin)
793static bool
794compile_c_to_so(const char *c_file, const char *so_file)
795{
796 const char *files[] = { NULL, NULL, NULL, NULL, NULL, NULL, "-link", libruby_pathflag, NULL };
797 char *p;
798
799 // files[0] = "-Fe*.dll"
800 files[0] = p = alloca(sizeof(char) * (rb_strlen_lit("-Fe") + strlen(so_file) + 1));
801 p = append_lit(p, "-Fe");
802 p = append_str2(p, so_file, strlen(so_file));
803 *p = '\0';
804
805 // files[1] = "-Fo*.obj"
806 // We don't need .obj file, but it's somehow created to cwd without -Fo and we want to control the output directory.
807 files[1] = p = alloca(sizeof(char) * (rb_strlen_lit("-Fo") + strlen(so_file) - rb_strlen_lit(DLEXT) + rb_strlen_lit(".obj") + 1));
808 char *obj_file = p = append_lit(p, "-Fo");
809 p = append_str2(p, so_file, strlen(so_file) - rb_strlen_lit(DLEXT));
810 p = append_lit(p, ".obj");
811 *p = '\0';
812
813 // files[2] = "-Yu*.pch"
814 files[2] = p = alloca(sizeof(char) * (rb_strlen_lit("-Yu") + strlen(pch_file) + 1));
815 p = append_lit(p, "-Yu");
816 p = append_str2(p, pch_file, strlen(pch_file));
817 *p = '\0';
818
819 // files[3] = "C:/.../rb_mjit_header-*.obj"
820 files[3] = p = alloca(sizeof(char) * (strlen(pch_file) + 1));
821 p = append_str2(p, pch_file, strlen(pch_file) - strlen(".pch"));
822 p = append_lit(p, ".obj");
823 *p = '\0';
824
825 // files[4] = "-Tc*.c"
826 files[4] = p = alloca(sizeof(char) * (rb_strlen_lit("-Tc") + strlen(c_file) + 1));
827 p = append_lit(p, "-Tc");
828 p = append_str2(p, c_file, strlen(c_file));
829 *p = '\0';
830
831 // files[5] = "-Fd*.pdb"
832 files[5] = p = alloca(sizeof(char) * (rb_strlen_lit("-Fd") + strlen(pch_file) + 1));
833 p = append_lit(p, "-Fd");
834 p = append_str2(p, pch_file, strlen(pch_file) - rb_strlen_lit(".pch"));
835 p = append_lit(p, ".pdb");
836 *p = '\0';
837
838 char **args = form_args(5, CC_LDSHARED_ARGS, CC_CODEFLAG_ARGS,
839 files, CC_LIBS, CC_DLDFLAGS_ARGS);
840 if (args == NULL)
841 return false;
842
843 int exit_code = exec_process(cc_path, args);
844 free(args);
845
846 if (exit_code == 0) {
847 // remove never-used files (.obj, .lib, .exp, .pdb). XXX: Is there any way not to generate this?
848 if (!mjit_opts.save_temps) {
849 char *before_dot;
850 remove_file(obj_file);
851
852 before_dot = obj_file + strlen(obj_file) - rb_strlen_lit(".obj");
853 append_lit(before_dot, ".lib"); remove_file(obj_file);
854 append_lit(before_dot, ".exp"); remove_file(obj_file);
855 append_lit(before_dot, ".pdb"); remove_file(obj_file);
856 }
857 }
858 else {
859 verbose(2, "compile_c_to_so: compile error: %d", exit_code);
860 }
861 return exit_code == 0;
862}
863#else // _MSC_VER
864
865// The function producing the pre-compiled header.
866static void
867make_pch(void)
868{
869 const char *rest_args[] = {
870# ifdef __clang__
871 "-emit-pch",
872 "-c",
873# endif
874 // -nodefaultlibs is a linker flag, but it may affect cc1 behavior on Gentoo, which should NOT be changed on pch:
875 // https://gitweb.gentoo.org/proj/gcc-patches.git/tree/7.3.0/gentoo/13_all_default-ssp-fix.patch
877 "-o", pch_file, header_file,
878 NULL,
879 };
880
881 verbose(2, "Creating precompiled header");
882 char **args = form_args(4, cc_common_args, CC_CODEFLAG_ARGS, cc_added_args, rest_args);
883 if (args == NULL) {
884 mjit_warning("making precompiled header failed on forming args");
885 CRITICAL_SECTION_START(3, "in make_pch");
886 pch_status = PCH_FAILED;
887 CRITICAL_SECTION_FINISH(3, "in make_pch");
888 return;
889 }
890
891 int exit_code = exec_process(cc_path, args);
892 free(args);
893
894 CRITICAL_SECTION_START(3, "in make_pch");
895 if (exit_code == 0) {
896 pch_status = PCH_SUCCESS;
897 }
898 else {
899 mjit_warning("Making precompiled header failed on compilation. Stopping MJIT worker...");
900 pch_status = PCH_FAILED;
901 }
902 /* wakeup `mjit_finish` */
903 rb_native_cond_broadcast(&mjit_pch_wakeup);
904 CRITICAL_SECTION_FINISH(3, "in make_pch");
905}
906
907// Compile .c file to .so file. It returns true if it succeeds. (non-mswin)
908// Not compiling .c to .so directly because it fails on MinGW, and this helps
909// to generate no .dSYM on macOS.
910static bool
911compile_c_to_so(const char *c_file, const char *so_file)
912{
913 char* o_file = alloca(strlen(c_file) + 1);
914 strcpy(o_file, c_file);
915 o_file[strlen(c_file) - 1] = 'o';
916
917 const char *o_args[] = {
918 "-o", o_file, c_file,
919# ifdef __clang__
920 "-include-pch", pch_file,
921# endif
922 "-c", NULL
923 };
924 char **args = form_args(5, cc_common_args, CC_CODEFLAG_ARGS, cc_added_args, o_args, CC_LINKER_ARGS);
925 if (args == NULL) return false;
926 int exit_code = exec_process(cc_path, args);
927 free(args);
928 if (exit_code != 0) {
929 verbose(2, "compile_c_to_so: failed to compile .c to .o: %d", exit_code);
930 return false;
931 }
932
933 const char *so_args[] = {
934 "-o", so_file,
935# ifdef _WIN32
936 libruby_pathflag,
937# endif
938 o_file, NULL
939 };
940# if defined(__MACH__)
941 extern VALUE rb_libruby_selfpath;
942 const char *loader_args[] = {"-bundle_loader", StringValuePtr(rb_libruby_selfpath), NULL};
943# else
944 const char *loader_args[] = {NULL};
945# endif
946 args = form_args(7, CC_LDSHARED_ARGS, CC_CODEFLAG_ARGS, so_args, loader_args, CC_LIBS, CC_DLDFLAGS_ARGS, CC_LINKER_ARGS);
947 if (args == NULL) return false;
948 exit_code = exec_process(cc_path, args);
949 free(args);
950 if (!mjit_opts.save_temps) remove_file(o_file);
951 if (exit_code != 0) {
952 verbose(2, "compile_c_to_so: failed to link .o to .so: %d", exit_code);
953 }
954 return exit_code == 0;
955}
956#endif // _MSC_VER
957
958#if USE_JIT_COMPACTION
959static void compile_prelude(FILE *f);
960
961static bool
962compile_compact_jit_code(char* c_file)
963{
964 FILE *f;
965 int fd = rb_cloexec_open(c_file, c_file_access_mode, 0600);
966 if (fd < 0 || (f = fdopen(fd, "w")) == NULL) {
967 int e = errno;
968 if (fd >= 0) (void)close(fd);
969 verbose(1, "Failed to fopen '%s', giving up JIT for it (%s)", c_file, strerror(e));
970 return false;
971 }
972
973 compile_prelude(f);
974
975 // wait until mjit_gc_exit_hook is called
976 CRITICAL_SECTION_START(3, "before mjit_compile to wait GC finish");
977 while (in_gc) {
978 verbose(3, "Waiting wakeup from GC");
979 rb_native_cond_wait(&mjit_gc_wakeup, &mjit_engine_mutex);
980 }
981 // We need to check again here because we could've waited on GC above
982 bool iseq_gced = false;
983 struct rb_mjit_unit *child_unit = 0, *next;
984 list_for_each_safe(&active_units.head, child_unit, next, unode) {
985 if (child_unit->iseq == NULL) { // ISeq is GC-ed
986 iseq_gced = true;
987 verbose(1, "JIT compaction: A method for JIT code u%d is obsoleted. Compaction will be skipped.", child_unit->id);
988 remove_from_list(child_unit, &active_units);
989 free_unit(child_unit); // unload it without waiting for throttled unload_units to retry compaction quickly
990 }
991 }
992 in_jit = !iseq_gced;
993 CRITICAL_SECTION_FINISH(3, "before mjit_compile to wait GC finish");
994 if (!in_jit) {
995 fclose(f);
996 if (!mjit_opts.save_temps)
997 remove_file(c_file);
998 return false;
999 }
1000
1001 // This entire loop lock GC so that we do not need to consider a case that
1002 // ISeq is GC-ed in a middle of re-compilation. It takes 3~4ms with 100 methods
1003 // on my machine. It's not too bad compared to compilation time of C (7200~8000ms),
1004 // but it might be larger if we use a larger --jit-max-cache.
1005 //
1006 // TODO: Consider using a more granular lock after we implement inlining across
1007 // compacted functions (not done yet).
1008 bool success = true;
1009 list_for_each(&active_units.head, child_unit, unode) {
1010 CRITICAL_SECTION_START(3, "before set_compiling_iseqs");
1011 success &= set_compiling_iseqs(child_unit->iseq);
1012 CRITICAL_SECTION_FINISH(3, "after set_compiling_iseqs");
1013 if (!success) continue;
1014
1015 char funcname[MAXPATHLEN];
1016 sprint_funcname(funcname, child_unit);
1017
1018 long iseq_lineno = 0;
1019 if (FIXNUM_P(child_unit->iseq->body->location.first_lineno))
1020 // FIX2INT may fallback to rb_num2long(), which is a method call and dangerous in MJIT worker. So using only FIX2LONG.
1021 iseq_lineno = FIX2LONG(child_unit->iseq->body->location.first_lineno);
1022 const char *sep = "@";
1023 const char *iseq_label = RSTRING_PTR(child_unit->iseq->body->location.label);
1024 const char *iseq_path = RSTRING_PTR(rb_iseq_path(child_unit->iseq));
1025 if (!iseq_label) iseq_label = sep = "";
1026 fprintf(f, "\n/* %s%s%s:%ld */\n", iseq_label, sep, iseq_path, iseq_lineno);
1027 success &= mjit_compile(f, child_unit->iseq, funcname, child_unit->id);
1028
1029 CRITICAL_SECTION_START(3, "before compiling_iseqs free");
1030 free(compiling_iseqs);
1031 compiling_iseqs = NULL;
1032 CRITICAL_SECTION_FINISH(3, "after compiling_iseqs free");
1033 }
1034
1035 // release blocking mjit_gc_start_hook
1036 CRITICAL_SECTION_START(3, "after mjit_compile to wakeup client for GC");
1037 in_jit = false;
1038 verbose(3, "Sending wakeup signal to client in a mjit-worker for GC");
1039 rb_native_cond_signal(&mjit_client_wakeup);
1040 CRITICAL_SECTION_FINISH(3, "in worker to wakeup client for GC");
1041
1042 fclose(f);
1043 return success;
1044}
1045
1046// Compile all cached .c files and build a single .so file. Reload all JIT func from it.
1047// This improves the code locality for better performance in terms of iTLB and iCache.
1048static void
1049compact_all_jit_code(void)
1050{
1051 struct rb_mjit_unit *unit, *cur = 0;
1052 static const char c_ext[] = ".c";
1053 static const char so_ext[] = DLEXT;
1054 char c_file[MAXPATHLEN], so_file[MAXPATHLEN];
1055
1056 // Abnormal use case of rb_mjit_unit that doesn't have ISeq
1057 unit = calloc(1, sizeof(struct rb_mjit_unit)); // To prevent GC, don't use ZALLOC
1058 if (unit == NULL) return;
1059 unit->id = current_unit_num++;
1060 sprint_uniq_filename(c_file, (int)sizeof(c_file), unit->id, MJIT_TMP_PREFIX, c_ext);
1061 sprint_uniq_filename(so_file, (int)sizeof(so_file), unit->id, MJIT_TMP_PREFIX, so_ext);
1062
1063 bool success = compile_compact_jit_code(c_file);
1064 double start_time = real_ms_time();
1065 if (success) {
1066 success = compile_c_to_so(c_file, so_file);
1067 if (!mjit_opts.save_temps)
1068 remove_file(c_file);
1069 }
1070 double end_time = real_ms_time();
1071
1072 if (success) {
1073 void *handle = dlopen(so_file, RTLD_NOW);
1074 if (handle == NULL) {
1075 mjit_warning("failure in loading code from compacted '%s': %s", so_file, dlerror());
1076 free(unit);
1077 return;
1078 }
1079 unit->handle = handle;
1080
1081 // lazily dlclose handle (and .so file for win32) on `mjit_finish()`.
1082 add_to_list(unit, &compact_units);
1083
1084 if (!mjit_opts.save_temps)
1085 remove_so_file(so_file, unit);
1086
1087 CRITICAL_SECTION_START(3, "in compact_all_jit_code to read list");
1088 list_for_each(&active_units.head, cur, unode) {
1089 void *func;
1090 char funcname[MAXPATHLEN];
1091 sprint_funcname(funcname, cur);
1092
1093 if ((func = dlsym(handle, funcname)) == NULL) {
1094 mjit_warning("skipping to reload '%s' from '%s': %s", funcname, so_file, dlerror());
1095 continue;
1096 }
1097
1098 if (cur->iseq) { // Check whether GCed or not
1099 // Usage of jit_code might be not in a critical section.
1100 MJIT_ATOMIC_SET(cur->iseq->body->jit_func, (mjit_func_t)func);
1101 }
1102 }
1103 CRITICAL_SECTION_FINISH(3, "in compact_all_jit_code to read list");
1104 verbose(1, "JIT compaction (%.1fms): Compacted %d methods %s -> %s", end_time - start_time, active_units.length, c_file, so_file);
1105 }
1106 else {
1107 free(unit);
1108 verbose(1, "JIT compaction failure (%.1fms): Failed to compact methods", end_time - start_time);
1109 }
1110}
1111#endif // USE_JIT_COMPACTION
1112
1113static void *
1114load_func_from_so(const char *so_file, const char *funcname, struct rb_mjit_unit *unit)
1115{
1116 void *handle, *func;
1117
1118 handle = dlopen(so_file, RTLD_NOW);
1119 if (handle == NULL) {
1120 mjit_warning("failure in loading code from '%s': %s", so_file, dlerror());
1121 return (void *)NOT_ADDED_JIT_ISEQ_FUNC;
1122 }
1123
1124 func = dlsym(handle, funcname);
1125 unit->handle = handle;
1126 return func;
1127}
1128
1129#ifndef __clang__
1130static const char *
1131header_name_end(const char *s)
1132{
1133 const char *e = s + strlen(s);
1134# ifdef __GNUC__ // don't chomp .pch for mswin
1135 static const char suffix[] = ".gch";
1136
1137 // chomp .gch suffix
1138 if (e > s+sizeof(suffix)-1 && strcmp(e-sizeof(suffix)+1, suffix) == 0) {
1139 e -= sizeof(suffix)-1;
1140 }
1141# endif
1142 return e;
1143}
1144#endif
1145
1146// Print platform-specific prerequisites in generated code.
1147static void
1148compile_prelude(FILE *f)
1149{
1150#ifndef __clang__ // -include-pch is used for Clang
1151 const char *s = pch_file;
1152 const char *e = header_name_end(s);
1153
1154 fprintf(f, "#include \"");
1155 // print pch_file except .gch for gcc, but keep .pch for mswin
1156 for (; s < e; s++) {
1157 switch(*s) {
1158 case '\\': case '"':
1159 fputc('\\', f);
1160 }
1161 fputc(*s, f);
1162 }
1163 fprintf(f, "\"\n");
1164#endif
1165
1166#ifdef _WIN32
1167 fprintf(f, "void _pei386_runtime_relocator(void){}\n");
1168 fprintf(f, "int __stdcall DllMainCRTStartup(void* hinstDLL, unsigned int fdwReason, void* lpvReserved) { return 1; }\n");
1169#endif
1170}
1171
1172// Compile ISeq in UNIT and return function pointer of JIT-ed code.
1173// It may return NOT_COMPILED_JIT_ISEQ_FUNC if something went wrong.
1174static mjit_func_t
1175convert_unit_to_func(struct rb_mjit_unit *unit)
1176{
1177 static const char c_ext[] = ".c";
1178 static const char so_ext[] = DLEXT;
1179 char c_file[MAXPATHLEN], so_file[MAXPATHLEN], funcname[MAXPATHLEN];
1180
1181 sprint_uniq_filename(c_file, (int)sizeof(c_file), unit->id, MJIT_TMP_PREFIX, c_ext);
1182 sprint_uniq_filename(so_file, (int)sizeof(so_file), unit->id, MJIT_TMP_PREFIX, so_ext);
1183 sprint_funcname(funcname, unit);
1184
1185 FILE *f;
1186 int fd = rb_cloexec_open(c_file, c_file_access_mode, 0600);
1187 if (fd < 0 || (f = fdopen(fd, "w")) == NULL) {
1188 int e = errno;
1189 if (fd >= 0) (void)close(fd);
1190 verbose(1, "Failed to fopen '%s', giving up JIT for it (%s)", c_file, strerror(e));
1191 return (mjit_func_t)NOT_COMPILED_JIT_ISEQ_FUNC;
1192 }
1193
1194 // print #include of MJIT header, etc.
1195 compile_prelude(f);
1196
1197 // wait until mjit_gc_exit_hook is called
1198 CRITICAL_SECTION_START(3, "before mjit_compile to wait GC finish");
1199 while (in_gc) {
1200 verbose(3, "Waiting wakeup from GC");
1201 rb_native_cond_wait(&mjit_gc_wakeup, &mjit_engine_mutex);
1202 }
1203 // We need to check again here because we could've waited on GC above
1204 in_jit = (unit->iseq != NULL);
1205 if (in_jit)
1206 in_jit &= set_compiling_iseqs(unit->iseq);
1207 CRITICAL_SECTION_FINISH(3, "before mjit_compile to wait GC finish");
1208 if (!in_jit) {
1209 fclose(f);
1210 if (!mjit_opts.save_temps)
1211 remove_file(c_file);
1212 return (mjit_func_t)NOT_COMPILED_JIT_ISEQ_FUNC;
1213 }
1214
1215 // To make MJIT worker thread-safe against GC.compact, copy ISeq values while `in_jit` is true.
1216 long iseq_lineno = 0;
1217 if (FIXNUM_P(unit->iseq->body->location.first_lineno))
1218 // FIX2INT may fallback to rb_num2long(), which is a method call and dangerous in MJIT worker. So using only FIX2LONG.
1219 iseq_lineno = FIX2LONG(unit->iseq->body->location.first_lineno);
1220 char *iseq_label = alloca(RSTRING_LEN(unit->iseq->body->location.label) + 1);
1221 char *iseq_path = alloca(RSTRING_LEN(rb_iseq_path(unit->iseq)) + 1);
1222 strcpy(iseq_label, RSTRING_PTR(unit->iseq->body->location.label));
1223 strcpy(iseq_path, RSTRING_PTR(rb_iseq_path(unit->iseq)));
1224
1225 verbose(2, "start compilation: %s@%s:%ld -> %s", iseq_label, iseq_path, iseq_lineno, c_file);
1226 fprintf(f, "/* %s@%s:%ld */\n\n", iseq_label, iseq_path, iseq_lineno);
1227 bool success = mjit_compile(f, unit->iseq, funcname, unit->id);
1228
1229 // release blocking mjit_gc_start_hook
1230 CRITICAL_SECTION_START(3, "after mjit_compile to wakeup client for GC");
1231 free(compiling_iseqs);
1232 compiling_iseqs = NULL;
1233 in_jit = false;
1234 verbose(3, "Sending wakeup signal to client in a mjit-worker for GC");
1235 rb_native_cond_signal(&mjit_client_wakeup);
1236 CRITICAL_SECTION_FINISH(3, "in worker to wakeup client for GC");
1237
1238 fclose(f);
1239 if (!success) {
1240 if (!mjit_opts.save_temps)
1241 remove_file(c_file);
1242 verbose(1, "JIT failure: %s@%s:%ld -> %s", iseq_label, iseq_path, iseq_lineno, c_file);
1243 return (mjit_func_t)NOT_COMPILED_JIT_ISEQ_FUNC;
1244 }
1245
1246 double start_time = real_ms_time();
1247 success = compile_c_to_so(c_file, so_file);
1248 if (!mjit_opts.save_temps)
1249 remove_file(c_file);
1250 double end_time = real_ms_time();
1251
1252 if (!success) {
1253 verbose(2, "Failed to generate so: %s", so_file);
1254 return (mjit_func_t)NOT_COMPILED_JIT_ISEQ_FUNC;
1255 }
1256
1257 void *func = load_func_from_so(so_file, funcname, unit);
1258 if (!mjit_opts.save_temps)
1259 remove_so_file(so_file, unit);
1260
1261 if ((uintptr_t)func > (uintptr_t)LAST_JIT_ISEQ_FUNC) {
1262 verbose(1, "JIT success (%.1fms): %s@%s:%ld -> %s",
1263 end_time - start_time, iseq_label, iseq_path, iseq_lineno, c_file);
1264 }
1265 return (mjit_func_t)func;
1266}
1267
1268// To see cc_entries using index returned by `mjit_capture_cc_entries` in mjit_compile.c
1269const struct rb_callcache **
1271{
1272 return body->jit_unit->cc_entries;
1273}
1274
1275// Capture cc entries of `captured_iseq` and append them to `compiled_iseq->jit_unit->cc_entries`.
1276// This is needed when `captured_iseq` is inlined by `compiled_iseq` and GC needs to mark inlined cc.
1277//
1278// Index to refer to `compiled_iseq->jit_unit->cc_entries` is returned instead of the address
1279// because old addresses may be invalidated by `realloc` later. -1 is returned on failure.
1280//
1281// This assumes that it's safe to reference cc without acquiring GVL.
1282int
1283mjit_capture_cc_entries(const struct rb_iseq_constant_body *compiled_iseq, const struct rb_iseq_constant_body *captured_iseq)
1284{
1285 struct rb_mjit_unit *unit = compiled_iseq->jit_unit;
1286 unsigned int new_entries_size = unit->cc_entries_size + captured_iseq->ci_size;
1287 VM_ASSERT(captured_iseq->ci_size > 0);
1288
1289 // Allocate new cc_entries and append them to unit->cc_entries
1290 const struct rb_callcache **cc_entries;
1291 int cc_entries_index = unit->cc_entries_size;
1292 if (unit->cc_entries_size == 0) {
1293 VM_ASSERT(unit->cc_entries == NULL);
1294 unit->cc_entries = cc_entries = malloc(sizeof(struct rb_callcache *) * new_entries_size);
1295 if (cc_entries == NULL) return -1;
1296 }
1297 else {
1298 void *cc_ptr = (void *)unit->cc_entries; // get rid of bogus warning by VC
1299 cc_entries = realloc(cc_ptr, sizeof(struct rb_callcache *) * new_entries_size);
1300 if (cc_entries == NULL) return -1;
1301 unit->cc_entries = cc_entries;
1302 cc_entries += cc_entries_index;
1303 }
1304 unit->cc_entries_size = new_entries_size;
1305
1306 // Capture cc to cc_enties
1307 for (unsigned int i = 0; i < captured_iseq->ci_size; i++) {
1308 cc_entries[i] = captured_iseq->call_data[i].cc;
1309 }
1310
1311 return cc_entries_index;
1312}
1313
1314// Set up field `used_code_p` for unit iseqs whose iseq on the stack of ec.
1315static void
1316mark_ec_units(rb_execution_context_t *ec)
1317{
1318 const rb_control_frame_t *cfp;
1319
1320 if (ec->vm_stack == NULL)
1321 return;
1322 for (cfp = RUBY_VM_END_CONTROL_FRAME(ec) - 1; ; cfp = RUBY_VM_NEXT_CONTROL_FRAME(cfp)) {
1323 const rb_iseq_t *iseq;
1324 if (cfp->pc && (iseq = cfp->iseq) != NULL
1325 && imemo_type((VALUE) iseq) == imemo_iseq
1326 && (iseq->body->jit_unit) != NULL) {
1327 iseq->body->jit_unit->used_code_p = true;
1328 }
1329
1330 if (cfp == ec->cfp)
1331 break; // reached the most recent cfp
1332 }
1333}
1334
1335// MJIT info related to an existing continutaion.
1337 rb_execution_context_t *ec; // continuation ec
1338 struct mjit_cont *prev, *next; // used to form lists
1339};
1340
1341// Double linked list of registered continuations. This is used to detect
1342// units which are in use in unload_units.
1343static struct mjit_cont *first_cont;
1344
1345// Unload JIT code of some units to satisfy the maximum permitted
1346// number of units with a loaded code.
1347static void
1348unload_units(void)
1349{
1350 struct rb_mjit_unit *unit = 0, *next;
1351 struct mjit_cont *cont;
1352 int units_num = active_units.length;
1353
1354 // For now, we don't unload units when ISeq is GCed. We should
1355 // unload such ISeqs first here.
1356 list_for_each_safe(&active_units.head, unit, next, unode) {
1357 if (unit->iseq == NULL) { // ISeq is GCed.
1358 remove_from_list(unit, &active_units);
1359 free_unit(unit);
1360 }
1361 }
1362
1363 // Detect units which are in use and can't be unloaded.
1364 list_for_each(&active_units.head, unit, unode) {
1365 assert(unit->iseq != NULL && unit->handle != NULL);
1366 unit->used_code_p = false;
1367 }
1368 // All threads have a root_fiber which has a mjit_cont. Other normal fibers also
1369 // have a mjit_cont. Thus we can check ISeqs in use by scanning ec of mjit_conts.
1370 for (cont = first_cont; cont != NULL; cont = cont->next) {
1371 mark_ec_units(cont->ec);
1372 }
1373 // TODO: check stale_units and unload unused ones! (note that the unit is not associated to ISeq anymore)
1374
1375 // Unload units whose total_calls is smaller than any total_calls in unit_queue.
1376 // TODO: make the algorithm more efficient
1377 long unsigned prev_queue_calls = -1;
1378 while (true) {
1379 // Calculate the next max total_calls in unit_queue
1380 long unsigned max_queue_calls = 0;
1381 list_for_each(&unit_queue.head, unit, unode) {
1382 if (unit->iseq != NULL && max_queue_calls < unit->iseq->body->total_calls
1383 && unit->iseq->body->total_calls < prev_queue_calls) {
1384 max_queue_calls = unit->iseq->body->total_calls;
1385 }
1386 }
1387 prev_queue_calls = max_queue_calls;
1388
1389 bool unloaded_p = false;
1390 list_for_each_safe(&active_units.head, unit, next, unode) {
1391 if (unit->used_code_p) // We can't unload code on stack.
1392 continue;
1393
1394 if (max_queue_calls > unit->iseq->body->total_calls) {
1395 verbose(2, "Unloading unit %d (calls=%lu, threshold=%lu)",
1396 unit->id, unit->iseq->body->total_calls, max_queue_calls);
1397 assert(unit->handle != NULL);
1398 remove_from_list(unit, &active_units);
1399 free_unit(unit);
1400 unloaded_p = true;
1401 }
1402 }
1403 if (!unloaded_p) break;
1404 }
1405
1406 if (units_num > active_units.length) {
1407 verbose(1, "Too many JIT code -- %d units unloaded", units_num - active_units.length);
1408 total_unloads += units_num - active_units.length;
1409 }
1410}
1411
1412static void mjit_add_iseq_to_process(const rb_iseq_t *iseq, const struct rb_mjit_compile_info *compile_info, bool worker_p);
1413
1414// The function implementing a worker. It is executed in a separate
1415// thread by rb_thread_create_mjit_thread. It compiles precompiled header
1416// and then compiles requested ISeqs.
1417void
1419{
1420 // Allow only `max_cache_size / 10` times (default: 10) of compaction.
1421 // Note: GC of compacted code has not been implemented yet.
1422 int max_compact_size = mjit_opts.max_cache_size / 10;
1423 if (max_compact_size < 10) max_compact_size = 10;
1424
1425 // Run unload_units after it's requested `max_cache_size / 10` (default: 10) times.
1426 // This throttles the call to mitigate locking in unload_units. It also throttles JIT compaction.
1427 int throttle_threshold = mjit_opts.max_cache_size / 10;
1428
1429#ifndef _MSC_VER
1430 if (pch_status == PCH_NOT_READY) {
1431 make_pch();
1432 }
1433#endif
1434 if (pch_status == PCH_FAILED) {
1435 mjit_enabled = false;
1436 CRITICAL_SECTION_START(3, "in worker to update worker_stopped");
1437 worker_stopped = true;
1438 verbose(3, "Sending wakeup signal to client in a mjit-worker");
1439 rb_native_cond_signal(&mjit_client_wakeup);
1440 CRITICAL_SECTION_FINISH(3, "in worker to update worker_stopped");
1441 return; // TODO: do the same thing in the latter half of mjit_finish
1442 }
1443
1444 // main worker loop
1445 while (!stop_worker_p) {
1446 struct rb_mjit_unit *unit;
1447
1448 // Wait until a unit becomes available
1449 CRITICAL_SECTION_START(3, "in worker dequeue");
1450 while ((list_empty(&unit_queue.head) || active_units.length >= mjit_opts.max_cache_size) && !stop_worker_p) {
1451 rb_native_cond_wait(&mjit_worker_wakeup, &mjit_engine_mutex);
1452 verbose(3, "Getting wakeup from client");
1453
1454 // Lazily move active_units to stale_units to avoid race conditions around active_units with compaction
1455 if (pending_stale_p) {
1456 pending_stale_p = false;
1457 struct rb_mjit_unit *next;
1458 list_for_each_safe(&active_units.head, unit, next, unode) {
1459 if (unit->stale_p) {
1460 unit->stale_p = false;
1461 remove_from_list(unit, &active_units);
1462 add_to_list(unit, &stale_units);
1463 // Lazily put it to unit_queue as well to avoid race conditions on jit_unit with mjit_compile.
1464 mjit_add_iseq_to_process(unit->iseq, &unit->iseq->body->jit_unit->compile_info, true);
1465 }
1466 }
1467 }
1468
1469 // Unload some units as needed
1470 if (unload_requests >= throttle_threshold) {
1471 while (in_gc) {
1472 verbose(3, "Waiting wakeup from GC");
1473 rb_native_cond_wait(&mjit_gc_wakeup, &mjit_engine_mutex);
1474 }
1475 in_jit = true; // Lock GC
1476
1477 RB_DEBUG_COUNTER_INC(mjit_unload_units);
1478 unload_units();
1479 unload_requests = 0;
1480
1481 in_jit = false; // Unlock GC
1482 verbose(3, "Sending wakeup signal to client in a mjit-worker for GC");
1483 rb_native_cond_signal(&mjit_client_wakeup);
1484 }
1485 if (active_units.length == mjit_opts.max_cache_size && mjit_opts.wait) { // Sometimes all methods may be in use
1486 mjit_opts.max_cache_size++; // avoid infinite loop on `rb_mjit_wait_call`. Note that --jit-wait is just for testing.
1487 verbose(1, "No units can be unloaded -- incremented max-cache-size to %d for --jit-wait", mjit_opts.max_cache_size);
1488 }
1489 }
1490 unit = get_from_list(&unit_queue);
1491 CRITICAL_SECTION_FINISH(3, "in worker dequeue");
1492
1493 if (unit) {
1494 // JIT compile
1495 mjit_func_t func = convert_unit_to_func(unit);
1496 (void)RB_DEBUG_COUNTER_INC_IF(mjit_compile_failures, func == (mjit_func_t)NOT_COMPILED_JIT_ISEQ_FUNC);
1497
1498 CRITICAL_SECTION_START(3, "in jit func replace");
1499 while (in_gc) { // Make sure we're not GC-ing when touching ISeq
1500 verbose(3, "Waiting wakeup from GC");
1501 rb_native_cond_wait(&mjit_gc_wakeup, &mjit_engine_mutex);
1502 }
1503 if (unit->iseq) { // Check whether GCed or not
1504 if ((uintptr_t)func > (uintptr_t)LAST_JIT_ISEQ_FUNC) {
1505 add_to_list(unit, &active_units);
1506 }
1507 // Usage of jit_code might be not in a critical section.
1508 MJIT_ATOMIC_SET(unit->iseq->body->jit_func, func);
1509 }
1510 else {
1511 free_unit(unit);
1512 }
1513 CRITICAL_SECTION_FINISH(3, "in jit func replace");
1514
1515#if USE_JIT_COMPACTION
1516 // Combine .o files to one .so and reload all jit_func to improve memory locality.
1517 if (compact_units.length < max_compact_size
1518 && ((!mjit_opts.wait && unit_queue.length == 0 && active_units.length > 1)
1519 || (active_units.length == mjit_opts.max_cache_size && compact_units.length * throttle_threshold <= total_unloads))) { // throttle compaction by total_unloads
1520 compact_all_jit_code();
1521 }
1522#endif
1523 }
1524 }
1525
1526 // To keep mutex unlocked when it is destroyed by mjit_finish, don't wrap CRITICAL_SECTION here.
1527 worker_stopped = true;
1528}
#define PRINTF_ARGS(decl, string_index, first_to_check)
Definition: attributes.h:112
Internal header for the compiler.
int rb_vm_insn_addr2insn(const void *)
Definition: iseq.c:3172
#define RB_DEBUG_COUNTER_INC_IF(type, cond)
#define RB_DEBUG_COUNTER_INC(type)
#define assert(x)
Definition: dlmalloc.c:1176
#define free(x)
Definition: dln.c:52
big_t * num
Definition: enough.c:232
uint8_t len
Definition: escape.c:17
char str[HTML_ESCAPE_MAX_LEN+1]
Definition: escape.c:18
#define RSTRING_LEN(string)
Definition: fbuffer.h:22
#define RSTRING_PTR(string)
Definition: fbuffer.h:19
#define alloca
Definition: ffi_common.h:27
#define memcpy(d, s, n)
Definition: ffi_common.h:55
#define O_BINARY
const char ruby_null_device[]
Definition: file.c:6457
#define WEXITSTATUS(status)
Definition: error.c:60
#define WIFEXITED(status)
Definition: error.c:56
unsigned char suffix[65536]
Definition: gun.c:164
unsigned short prefix[65536]
Definition: gun.c:163
int rb_hash_stlike_lookup(VALUE hash, st_data_t key, st_data_t *pval)
Definition: hash.c:2026
imemo_type
Definition: imemo.h:34
@ imemo_iseq
Definition: imemo.h:42
#define STRINGIZE(expr)
Definition: config.h:82
Defines RBIMPL_HAS_BUILTIN.
int rb_cloexec_open(const char *pathname, int flags, mode_t mode)
Definition: io.c:307
#define rb_strlen_lit(str)
Definition: string.h:286
int dup2(int, int)
Definition: dup2.c:27
char * strstr(const char *, const char *)
Definition: strstr.c:8
char * strerror(int)
Definition: strerror.c:11
#define strdup(s)
Definition: util.h:39
#define RUBY_API_VERSION_TEENY
Definition: version.h:32
#define RUBY_API_VERSION_MAJOR
Definition: version.h:30
#define RUBY_API_VERSION_MINOR
Definition: version.h:31
Internal header for GC.
voidpf void uLong size
Definition: ioapi.h:138
const char * filename
Definition: ioapi.h:137
typedef int(ZCALLBACK *close_file_func) OF((voidpf opaque
VALUE rb_iseq_path(const rb_iseq_t *iseq)
Definition: iseq.c:1087
#define LONG2FIX
Definition: long.h:49
#define FIX2LONG
Definition: long.h:46
#define MEMCPY(p1, p2, type, n)
Definition: memory.h:129
#define MJIT_CFLAGS
Definition: mjit_config.h:11
#define MJIT_CC_COMMON
Definition: mjit_config.h:10
#define MJIT_LIBS
Definition: mjit_config.h:16
#define MJIT_DLDFLAGS
Definition: mjit_config.h:15
#define MJIT_DEBUGFLAGS
Definition: mjit_config.h:13
#define MJIT_LDSHARED
Definition: mjit_config.h:14
#define MJIT_OPTFLAGS
Definition: mjit_config.h:12
rb_pid_t ruby_waitpid_locked(rb_vm_t *, rb_pid_t, int *status, int options, rb_nativethread_cond_t *cond)
Definition: process.c:1198
void mjit_worker(void)
Definition: mjit_worker.c:1418
int mjit_capture_cc_entries(const struct rb_iseq_constant_body *compiled_iseq, const struct rb_iseq_constant_body *captured_iseq)
Definition: mjit_worker.c:1283
bool mjit_valid_class_serial_p(rb_serial_t class_serial)
Definition: mjit_worker.c:497
void rb_native_mutex_lock(rb_nativethread_lock_t *lock)
void rb_native_cond_initialize(rb_nativethread_cond_t *cond)
#define append_str2(p, str, len)
Definition: mjit_worker.c:787
void rb_native_cond_broadcast(rb_nativethread_cond_t *cond)
struct mjit_options mjit_opts
Definition: mjit_worker.c:199
#define CC_CODEFLAG_ARGS
Definition: mjit_worker.c:329
const struct rb_callcache ** mjit_iseq_cc_entries(const struct rb_iseq_constant_body *const body)
Definition: mjit_worker.c:1270
void rb_native_mutex_initialize(rb_nativethread_lock_t *lock)
void rb_native_mutex_unlock(rb_nativethread_lock_t *lock)
#define GCC_NOSTDLIB_FLAGS
Definition: mjit_worker.c:292
#define GCC_PIC_FLAGS
Definition: mjit_worker.c:283
bool rb_mjit_compiling_iseq_p(const rb_iseq_t *iseq)
Definition: mjit_worker.c:770
bool mjit_call_p
Definition: mjit_worker.c:205
void rb_native_mutex_destroy(rb_nativethread_lock_t *lock)
#define MJIT_TMP_PREFIX
Definition: mjit_worker.c:142
bool mjit_enabled
Definition: mjit_worker.c:202
void rb_native_cond_destroy(rb_nativethread_cond_t *cond)
void rb_native_cond_signal(rb_nativethread_cond_t *cond)
#define MJIT_ATOMIC_SET(var, val)
Definition: mjit_worker.c:140
#define MAXPATHLEN
Definition: mjit_worker.c:122
void rb_native_cond_wait(rb_nativethread_cond_t *cond, rb_nativethread_lock_t *mutex)
#define append_lit(p, str)
Definition: mjit_worker.c:789
@ PCH_SUCCESS
Definition: mjit_worker.c:263
@ PCH_FAILED
Definition: mjit_worker.c:263
@ PCH_NOT_READY
Definition: mjit_worker.c:263
#define WIFSIGNALED(w)
Definition: process.c:134
#define NULL
Definition: regenc.h:69
#define StringValuePtr(v)
Definition: rstring.h:51
char ** argv
Definition: ruby.c:241
unsigned LONG_LONG rb_serial_t
Definition: serial.h:19
#define FIXNUM_P
#define f
#define realloc
Definition: st.c:172
#define calloc
Definition: st.c:171
#define malloc
Definition: st.c:170
size_t strlen(const char *)
#define const
Definition: strftime.c:108
rb_execution_context_t * ec
Definition: mjit_worker.c:1337
struct mjit_cont * next
Definition: mjit_worker.c:1338
struct mjit_cont * prev
Definition: mjit_worker.c:1338
const struct rb_callcache * cc
Definition: vm_callinfo.h:428
const struct rb_callinfo * ci
Definition: vm_callinfo.h:427
const rb_iseq_t * iseq
Definition: vm_core.h:772
const VALUE * pc
Definition: vm_core.h:770
rb_control_frame_t * cfp
Definition: vm_core.h:858
unsigned int ci_size
Definition: vm_core.h:426
VALUE * iseq_encoded
Definition: vm_core.h:319
unsigned int iseq_size
Definition: vm_core.h:318
rb_iseq_location_t location
Definition: vm_core.h:393
struct rb_call_data * call_data
Definition: vm_core.h:415
struct rb_iseq_constant_body * body
Definition: vm_core.h:448
struct list_head head
Definition: mjit_worker.c:178
const struct rb_callcache ** cc_entries
Definition: mjit_worker.c:172
void * handle
Definition: mjit_worker.c:159
rb_iseq_t * iseq
Definition: mjit_worker.c:160
struct rb_mjit_compile_info compile_info
Definition: mjit_worker.c:170
unsigned int cc_entries_size
Definition: mjit_worker.c:173
bool used_code_p
Definition: mjit_worker.c:166
struct list_node unode
Definition: mjit_worker.c:155
rb_nativethread_lock_t waitpid_lock
Definition: vm_core.h:597
#define snprintf
Definition: subst.h:14
unsigned long VALUE
Definition: value.h:38
#define SIGCHLD_LOSSY
Definition: vm_core.h:128
#define WAITPID_USE_SIGCHLD
Definition: vm_core.h:132
#define RUBY_VM_NEXT_CONTROL_FRAME(cfp)
Definition: vm_core.h:1393
#define VM_ASSERT(expr)
Definition: vm_core.h:61
struct rb_call_data * CALL_DATA
Definition: vm_core.h:1149
#define RUBY_SIGCHLD
Definition: vm_core.h:121
#define COMPILER_WARNING_PUSH
Definition: warnings.h:13
#define COMPILER_WARNING_POP
Definition: warnings.h:14
#define COMPILER_WARNING_IGNORED(flag)
Definition: warnings.h:16
HANDLE rb_w32_start_process(const char *abspath, char *const *argv, int out_fd)
Definition: win32.c:1330
int gettimeofday(struct timeval *, struct timezone *)
Definition: win32.c:4654
int intptr_t
Definition: win32.h:90
#define stat
Definition: win32.h:195
rb_pid_t waitpid(rb_pid_t, int *, int)
Definition: win32.c:4532
int clockid_t
Definition: win32.h:132
#define CLOCK_MONOTONIC
Definition: win32.h:134
unsigned int uintptr_t
Definition: win32.h:106
#define CLOCK_REALTIME
Definition: win32.h:133
int clock_gettime(clockid_t, struct timespec *)
Definition: win32.c:4668
if((ID)(DISPID) nameid !=nameid)
Definition: win32ole.c:357
#define fdopen(fd, type)
Definition: zutil.h:181