Skip to content

Commit 45aa5b3

Browse files
committed
[GR-50092] Fix GIL release scheduler
PullRequest: graalpython/3067
2 parents 6a2c75c + feb8a74 commit 45aa5b3

File tree

3 files changed

+75
-43
lines changed

3 files changed

+75
-43
lines changed

graalpython/com.oracle.graal.python.test/src/tests/test_thread.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -529,10 +529,10 @@ def test_gil_fairness(self):
529529
# Test that a thread runing a tight loop without sleeps cannot completely starve other threads
530530
program = dedent("""\
531531
import threading, time
532-
thread_count = 5
532+
thread_count = 5000
533533
done = 0
534534
def target():
535-
time.sleep(0.5)
535+
time.sleep(0.01)
536536
global done
537537
done += 1
538538
threads = []

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/runtime/AsyncHandler.java

Lines changed: 66 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,6 @@
5353
import java.util.concurrent.Executors;
5454
import java.util.concurrent.ScheduledExecutorService;
5555
import java.util.concurrent.TimeUnit;
56-
import java.util.concurrent.atomic.AtomicBoolean;
5756

5857
import org.graalvm.nativeimage.ImageInfo;
5958

@@ -75,7 +74,6 @@
7574
import com.oracle.truffle.api.RootCallTarget;
7675
import com.oracle.truffle.api.ThreadLocalAction;
7776
import com.oracle.truffle.api.TruffleLanguage;
78-
import com.oracle.truffle.api.TruffleLanguage.Env;
7977
import com.oracle.truffle.api.TruffleLogger;
8078
import com.oracle.truffle.api.debug.Debugger;
8179
import com.oracle.truffle.api.frame.VirtualFrame;
@@ -339,51 +337,78 @@ void poll() {
339337
}
340338
}
341339

340+
private static class GilReleaseScheduler implements Runnable {
341+
private final PythonContext ctx;
342+
private volatile boolean gilReleaseRequested;
343+
private Thread lastGilOwner;
344+
345+
private GilReleaseScheduler(PythonContext ctx) {
346+
this.ctx = ctx;
347+
}
348+
349+
@Override
350+
public void run() {
351+
if (!ctx.gilHasQueuedThreads()) {
352+
// Don't release the gil if nobody is waiting for it
353+
return;
354+
}
355+
Thread gilOwner = ctx.getGilOwner();
356+
if (gilOwner != null) {
357+
synchronized (this) {
358+
if (!gilReleaseRequested) {
359+
gilReleaseRequested = true;
360+
/*
361+
* There is a race, but that's no problem. The gil owner may release the gil
362+
* before getting to run this safepoint. In that case, it just ignores it.
363+
* Some other thread will run and eventually get another gil release
364+
* request.
365+
*/
366+
ctx.getEnv().submitThreadLocal(new Thread[]{gilOwner}, new ThreadLocalAction(false, false) {
367+
@Override
368+
protected void perform(ThreadLocalAction.Access access) {
369+
// it may happen that we request a GIL release and no thread is
370+
// currently holding the GIL (e.g. all are sleeping). We still need
371+
// to tick again later, so we reset the gilReleaseRequested flag
372+
// even
373+
// when the thread in question isn't actually holding it.
374+
gilReleaseRequested = false;
375+
RootNode rootNode = access.getLocation().getRootNode();
376+
if (rootNode instanceof PRootNode) {
377+
if (rootNode.isInternal()) {
378+
return;
379+
}
380+
if (((PRootNode) rootNode).isPythonInternal()) {
381+
return;
382+
}
383+
// we only release the gil in ordinary Python code nodes
384+
GilNode gil = GilNode.getUncached();
385+
if (gil.tryRelease()) {
386+
gil.acquire(access.getLocation());
387+
}
388+
}
389+
}
390+
});
391+
} else if (gilOwner != lastGilOwner) {
392+
/*
393+
* If the gil changed owner since the last time we observed it, clear the
394+
* flag to make sure we don't get stuck if the last owner exits before
395+
* executing the safepoint.
396+
*/
397+
gilReleaseRequested = false;
398+
}
399+
lastGilOwner = gilOwner;
400+
}
401+
}
402+
}
403+
}
404+
342405
void activateGIL() {
343406
CompilerAsserts.neverPartOfCompilation();
344407
final PythonContext ctx = context.get();
345408
if (ctx == null) {
346409
return;
347410
}
348-
final Env env = ctx.getEnv();
349-
final AtomicBoolean gilReleaseRequested = new AtomicBoolean(false);
350-
final Runnable gilReleaseRunnable = () -> {
351-
if (gilReleaseRequested.compareAndSet(false, true)) {
352-
Thread gilOwner = ctx.getGilOwner();
353-
// There is a race, but that's no problem. The gil owner may release the gil before
354-
// getting to run this safepoint. In that case, it just ignores it. Some other
355-
// thread will run and eventually get another gil release request.
356-
if (gilOwner != null) {
357-
env.submitThreadLocal(new Thread[]{gilOwner}, new ThreadLocalAction(false, false) {
358-
@Override
359-
protected void perform(ThreadLocalAction.Access access) {
360-
// it may happen that we request a GIL release and no thread is
361-
// currently holding the GIL (e.g. all are sleeping). We still need
362-
// to tick again later, so we reset the gilReleaseRequested flag even
363-
// when the thread in question isn't actually holding it.
364-
gilReleaseRequested.set(false);
365-
RootNode rootNode = access.getLocation().getRootNode();
366-
if (rootNode instanceof PRootNode) {
367-
if (rootNode.isInternal()) {
368-
return;
369-
}
370-
if (((PRootNode) rootNode).isPythonInternal()) {
371-
return;
372-
}
373-
// we only release the gil in ordinary Python code nodes
374-
GilNode gil = GilNode.getUncached();
375-
if (gil.tryRelease()) {
376-
Thread.yield();
377-
gil.acquire(access.getLocation());
378-
}
379-
}
380-
}
381-
});
382-
} else {
383-
gilReleaseRequested.set(false);
384-
}
385-
}
386-
};
411+
final Runnable gilReleaseRunnable = new GilReleaseScheduler(ctx);
387412
if (PythonOptions.AUTOMATIC_ASYNC_ACTIONS) {
388413
executorService.scheduleWithFixedDelay(gilReleaseRunnable, GIL_RELEASE_DELAY, GIL_RELEASE_DELAY, TimeUnit.MILLISECONDS);
389414
} else {

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/runtime/PythonContext.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2243,6 +2243,13 @@ Thread getGilOwner() {
22432243
return globalInterpreterLock.getOwner();
22442244
}
22452245

2246+
/**
2247+
* Should not be used outside of {@link AsyncHandler}
2248+
*/
2249+
boolean gilHasQueuedThreads() {
2250+
return globalInterpreterLock.hasQueuedThreads();
2251+
}
2252+
22462253
/**
22472254
* Should not be called directly.
22482255
*

0 commit comments

Comments
 (0)