Skip to content

Commit

Permalink
Make CALL_ALLOC_AND_ENTER_INIT thread-safe
Browse files Browse the repository at this point in the history
- Modify `get_init_for_simple_managed_python_class` to return both init
  as well as the type version at the time of lookup.
- Modify caching logic to verify that the current version of the type
  matches the version at the time of lookup. This prevents potentially
  caching a stale value if we race with an update to __init__.
- Only cache __init__ functions that are deferred in free-threaded builds.
  This ensures that the borrowed reference to __init__ that is stored in
  the cache is valid if the type version guard in _CHECK_AND_ALLOCATE_OBJECT
  passes:
  1. The type version is cleared before the reference in the MRO to __init__
     is destroyed.
  2. If the reference in (1) was the last reference then the __init__ method
     will be queued for deletion the next time GC runs.
  3. GC requires stopping the world, which forces a synchronizes-with operation
     between all threads.
  4. If the GC collects the cached __init__, then type's version will have been
     updated *and* the update will be visible to all threads, so the guard
     cannot pass.
- There are no escaping calls in between loading from the specialization cache
  and pushing the frame. This is a requirement for the default build.
  • Loading branch information
mpage committed Nov 17, 2024
1 parent 003acd5 commit acd0052
Show file tree
Hide file tree
Showing 4 changed files with 56 additions and 20 deletions.
2 changes: 2 additions & 0 deletions Include/internal/pycore_object.h
Original file line number Diff line number Diff line change
Expand Up @@ -818,6 +818,8 @@ extern bool _PyObject_TryGetInstanceAttribute(PyObject *obj, PyObject *name,
PyObject **attr);
extern PyObject *_PyType_LookupRefAndVersion(PyTypeObject *, PyObject *,
unsigned int *);
extern int _PyType_CacheInitForSpecialization(PyTypeObject *, PyObject *,
unsigned int);

#ifdef Py_GIL_DISABLED
# define MANAGED_DICT_OFFSET (((Py_ssize_t)sizeof(PyObject *))*-1)
Expand Down
22 changes: 22 additions & 0 deletions Objects/typeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -5658,6 +5658,28 @@ _PyType_Lookup(PyTypeObject *type, PyObject *name)
return res;
}


int
_PyType_CacheInitForSpecialization(PyTypeObject *type, PyObject *init,
unsigned int tp_version)
{
if (!init || !tp_version) {
return 0;
}
int can_cache;
BEGIN_TYPE_LOCK();
can_cache = type->tp_version_tag == tp_version;
#ifdef Py_GIL_DISABLED
can_cache = can_cache && _PyObject_HasDeferredRefcount(init);
#endif
if (can_cache) {
PyHeapTypeObject *ht = (PyHeapTypeObject*) type;
FT_ATOMIC_STORE_PTR_RELAXED(ht->_spec_cache.init, init);
}
END_TYPE_LOCK();
return can_cache;
}

static void
set_flags(PyTypeObject *self, unsigned long mask, unsigned long flags)
{
Expand Down
4 changes: 2 additions & 2 deletions Python/bytecodes.c
Original file line number Diff line number Diff line change
Expand Up @@ -3704,10 +3704,10 @@ dummy_func(
DEOPT_IF(!PyStackRef_IsNull(null[0]));
DEOPT_IF(!PyType_Check(callable_o));
PyTypeObject *tp = (PyTypeObject *)callable_o;
DEOPT_IF(tp->tp_version_tag != type_version);
DEOPT_IF(FT_ATOMIC_LOAD_UINT32_RELAXED(tp->tp_version_tag) != type_version);
assert(tp->tp_flags & Py_TPFLAGS_INLINE_VALUES);
PyHeapTypeObject *cls = (PyHeapTypeObject *)callable_o;
PyFunctionObject *init_func = (PyFunctionObject *)cls->_spec_cache.init;
PyFunctionObject *init_func = (PyFunctionObject *)FT_ATOMIC_LOAD_PTR_RELAXED(cls->_spec_cache.init);
PyCodeObject *code = (PyCodeObject *)init_func->func_code;
DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize + _Py_InitCleanup.co_framesize));
STAT_INC(CALL, hit);
Expand Down
48 changes: 30 additions & 18 deletions Python/specialize.c
Original file line number Diff line number Diff line change
Expand Up @@ -1921,38 +1921,38 @@ _Py_Specialize_StoreSubscr(_PyStackRef container_st, _PyStackRef sub_st, _Py_COD
cache->counter = adaptive_counter_cooldown();
}

/* Returns a borrowed reference.
* The reference is only valid if guarded by a type version check.
*/
static PyFunctionObject *
get_init_for_simple_managed_python_class(PyTypeObject *tp)
/* Returns a strong reference. */
static PyObject *
get_init_for_simple_managed_python_class(PyTypeObject *tp, unsigned int *tp_version)
{
assert(tp->tp_new == PyBaseObject_Type.tp_new);
if (tp->tp_alloc != PyType_GenericAlloc) {
SPECIALIZATION_FAIL(CALL, SPEC_FAIL_OVERRIDDEN);
return NULL;
}
if ((tp->tp_flags & Py_TPFLAGS_INLINE_VALUES) == 0) {
unsigned long tp_flags = PyType_GetFlags(tp);
if ((tp_flags & Py_TPFLAGS_INLINE_VALUES) == 0) {
SPECIALIZATION_FAIL(CALL, SPEC_FAIL_CALL_INIT_NOT_INLINE_VALUES);
return NULL;
}
if (!(tp->tp_flags & Py_TPFLAGS_HEAPTYPE)) {
if (!(tp_flags & Py_TPFLAGS_HEAPTYPE)) {
/* Is this possible? */
SPECIALIZATION_FAIL(CALL, SPEC_FAIL_EXPECTED_ERROR);
return NULL;
}
PyObject *init = _PyType_Lookup(tp, &_Py_ID(__init__));
PyObject *init = _PyType_LookupRefAndVersion(tp, &_Py_ID(__init__), tp_version);
if (init == NULL || !PyFunction_Check(init)) {
SPECIALIZATION_FAIL(CALL, SPEC_FAIL_CALL_INIT_NOT_PYTHON);
Py_XDECREF(init);
return NULL;
}
int kind = function_kind((PyCodeObject *)PyFunction_GET_CODE(init));
if (kind != SIMPLE_FUNCTION) {
SPECIALIZATION_FAIL(CALL, SPEC_FAIL_CALL_INIT_NOT_SIMPLE);
Py_DECREF(init);
return NULL;
}
((PyHeapTypeObject *)tp)->_spec_cache.init = init;
return (PyFunctionObject *)init;
return init;
}

static void
Expand Down Expand Up @@ -1985,21 +1985,23 @@ specialize_class_call(PyObject *callable, _Py_CODEUNIT *instr, int nargs)
if (Py_TYPE(tp) != &PyType_Type) {
goto generic;
}
#ifndef Py_GIL_DISABLED
if (tp->tp_new == PyBaseObject_Type.tp_new) {
PyFunctionObject *init = get_init_for_simple_managed_python_class(tp);
if (type_get_version(tp, CALL) == 0) {
unspecialize(instr, SPEC_FAIL_CALL_NO_TYPE_VERSION);
unsigned int tp_version = 0;
PyObject *init = get_init_for_simple_managed_python_class(tp, &tp_version);
if (!tp_version) {
unspecialize(instr, SPEC_FAIL_OUT_OF_VERSIONS);
Py_XDECREF(init);
return;
}
if (init != NULL) {
if (init != NULL && _PyType_CacheInitForSpecialization(tp, init, tp_version)) {
_PyCallCache *cache = (_PyCallCache *)(instr + 1);
write_u32(cache->func_version, tp->tp_version_tag);
write_u32(cache->func_version, tp_version);
specialize(instr, CALL_ALLOC_AND_ENTER_INIT);
Py_DECREF(init);
return;
}
Py_XDECREF(init);
}
#endif
generic:
specialize(instr, CALL_NON_PY_GENERAL);
}
Expand Down Expand Up @@ -2187,7 +2189,7 @@ _Py_Specialize_Call(_PyStackRef callable_st, _Py_CODEUNIT *instr, int nargs)
}
}
else {
specialize(instr, CALL_NON_PY_GENERAL, NULL);
specialize(instr, CALL_NON_PY_GENERAL);
}
}

Expand Down Expand Up @@ -2801,6 +2803,13 @@ static const PyBytesObject no_location = {
.ob_sval = { NO_LOC_4 }
};

#ifdef Py_GIL_DISABLED
static _PyCodeArray init_cleanup_tlbc = {
.size = 1,
.entries = {(char*) &_Py_InitCleanup.co_code_adaptive},
};
#endif

const struct _PyCode8 _Py_InitCleanup = {
_PyVarObject_HEAD_INIT(&PyCode_Type, 3),
.co_consts = (PyObject *)&_Py_SINGLETON(tuple_empty),
Expand All @@ -2816,6 +2825,9 @@ const struct _PyCode8 _Py_InitCleanup = {
._co_firsttraceable = 4,
.co_stacksize = 2,
.co_framesize = 2 + FRAME_SPECIALS_SIZE,
#ifdef Py_GIL_DISABLED
.co_tlbc = &init_cleanup_tlbc,
#endif
.co_code_adaptive = {
EXIT_INIT_CHECK, 0,
RETURN_VALUE, 0,
Expand Down

0 comments on commit acd0052

Please sign in to comment.