Skip to content

Commit

Permalink
hash: Treat a waiting list match as a revalidation
Browse files Browse the repository at this point in the history
If a fetch succeeded and an object was inserted as cacheable, it means
that the VCL did not interpret the backend response as revalidated and
shareable with other clients on the waiting list.

After changing the rush to operate based on objcore flags instead of
the call site deciding how many clients to wake up, the same objcore
reference is now passed to requests before they reembark a worker. This
way when an objcore is already present during lookup we can attempt a
cache hit directly on the objcore that triggered the rush, removing a
degree of uncertainty in the waiting list behavior.

Instead of repurposing the req::hash_objhead field into an equivalent
req::hash_objcore, the field is actually removed. In order to signal
that a request comes back from its waiting list, the life time of the
req::waitinglist flag is extended until cnt_lookup() is reentered. But
to avoid infinite rush loops when a return(restart) is involved, the
flag is turned into a req::waitinglist_gen counter to prevent objcore
rushes from affecting requests entering the waiting list after they
began for a given objcore.

If the rushed objcore matches a request, the lookup can result in a
hit without entering the regular lookup critical section. The objhead
lock is briefly acquired to release req's reference on the objhead, to
rely solely on the objcore's objhead reference like a normal hit, and
generally perform hit-related operations. This change brings back the
exponential rush of cacheable objects briefly neutered.

This shifts the infamous waiting list serialization phenomenon to the
vary header match. Since most spurious rushes at every corner of objhead
activity are gone, this change puts all the spurious activity on the
incompatible variants alone instead of all objects on more occasions.

If a cacheable object was inserted in the cache, but already expired,
this behavior enables cache hits. This can be common with multi-tier
Varnish setups where one Varnish server may serve a graced object to
another, but true of any origin server that may serve stale yet valid
responses.

The waiting list enables a proper response-wide no-cache behavior from
now on, but the built-in VCL prevents it by default. This is also the
first step towards implementing no-cache and private support at the
header field granularity.

fgen
  • Loading branch information
dridi committed Oct 14, 2024
1 parent 2828d53 commit cd28daa
Show file tree
Hide file tree
Showing 7 changed files with 250 additions and 31 deletions.
5 changes: 2 additions & 3 deletions bin/varnishd/cache/cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,7 @@ struct objcore {
uint16_t oa_present;

unsigned timer_idx; // XXX 4Gobj limit
unsigned waitinglist_gen;
vtim_real last_lru;
VTAILQ_ENTRY(objcore) hsh_list;
VTAILQ_ENTRY(objcore) lru_list;
Expand Down Expand Up @@ -473,6 +474,7 @@ struct req {
stream_close_t doclose;
unsigned restarts;
unsigned esi_level;
unsigned waitinglist_gen;

/* Delivery mode */
unsigned res_mode;
Expand All @@ -499,9 +501,6 @@ struct req {

struct objcore *body_oc;

/* The busy objhead we sleep on */
struct objhead *hash_objhead;

/* Built Vary string == workspace reservation */
uint8_t *vary_b;
uint8_t *vary_e;
Expand Down
100 changes: 81 additions & 19 deletions bin/varnishd/cache/cache_hash.c
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ hsh_newobjhead(void)
ALLOC_OBJ(oh, OBJHEAD_MAGIC);
XXXAN(oh);
oh->refcnt = 1;
oh->waitinglist_gen = 1;
VTAILQ_INIT(&oh->objcs);
VTAILQ_INIT(&oh->waitinglist);
Lck_New(&oh->mtx, lck_objhdr);
Expand Down Expand Up @@ -355,6 +356,41 @@ hsh_insert_busyobj(const struct worker *wrk, struct objhead *oh)
return (oc);
}

/*---------------------------------------------------------------------
*/

static unsigned
hsh_rush_match(struct req *req)
{
struct objhead *oh;
struct objcore *oc;
const uint8_t *vary;

oc = req->objcore;
CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);

AZ(oc->flags & OC_F_BUSY);
AZ(oc->flags & OC_F_PRIVATE);
if (oc->flags & (OC_F_WITHDRAWN|OC_F_HFM|OC_F_HFP|OC_F_CANCEL|
OC_F_FAILED))
return (0);

if (req->vcf != NULL) /* NB: must operate under oh lock. */
return (0);

oh = oc->objhead;
CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);

if (req->hash_ignore_vary)
return (1);
if (!ObjHasAttr(req->wrk, oc, OA_VARY))
return (1);

vary = ObjGetAttr(req->wrk, oc, OA_VARY, NULL);
AN(vary);
return (VRY_Match(req, vary));
}

/*---------------------------------------------------------------------
*/

Expand Down Expand Up @@ -383,22 +419,40 @@ HSH_Lookup(struct req *req, struct objcore **ocp, struct objcore **bocp)
CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
CHECK_OBJ_NOTNULL(wrk->wpriv, WORKER_PRIV_MAGIC);
CHECK_OBJ_NOTNULL(req->http, HTTP_MAGIC);
CHECK_OBJ_ORNULL(req->objcore, OBJCORE_MAGIC);
CHECK_OBJ_ORNULL(req->vcf, VCF_MAGIC);
AN(hash);

hsh_prealloc(wrk);
if (DO_DEBUG(DBG_HASHEDGE))
hsh_testmagic(req->digest);

if (req->hash_objhead != NULL) {
if (req->objcore != NULL && hsh_rush_match(req)) {
TAKE_OBJ_NOTNULL(oc, &req->objcore, OBJCORE_MAGIC);
*ocp = oc;
oh = oc->objhead;
Lck_Lock(&oh->mtx);
oc->hits++;
boc_progress = oc->boc == NULL ? -1 : oc->boc->fetched_so_far;
AN(hsh_deref_objhead_unlock(wrk, &oh, oc));
Req_LogHit(wrk, req, oc, boc_progress);
/* NB: since this hit comes from the waiting list instead of
* a regular lookup, grace is not considered. The object is
* fresh in the context of the waiting list, even expired: it
* was successfully just [re]validated by a fetch task.
*/
return (HSH_HIT);
}

if (req->objcore != NULL) {
/*
* This req came off the waiting list, and brings an
* oh refcnt with it.
* oh refcnt and an incompatible oc refcnt with it,
* the latter acquired during rush hour.
*/
CHECK_OBJ_NOTNULL(req->hash_objhead, OBJHEAD_MAGIC);
oh = req->hash_objhead;
oh = req->objcore->objhead;
(void)HSH_DerefObjCore(wrk, &req->objcore);
Lck_Lock(&oh->mtx);
req->hash_objhead = NULL;
} else {
AN(wrk->wpriv->nobjhead);
oh = hash->lookup(wrk, req->digest, &wrk->wpriv->nobjhead);
Expand Down Expand Up @@ -581,13 +635,14 @@ HSH_Lookup(struct req *req, struct objcore **ocp, struct objcore **bocp)
AZ(req->hash_ignore_busy);

/*
* The objhead reference transfers to the sess, we get it
* back when the sess comes off the waiting list and
* calls us again
* The objhead reference is held by req while it is parked on the
* waiting list. The oh pointer is taken back from the objcore that
* triggers a rush of req off the waiting list.
*/
req->hash_objhead = oh;
assert(oh->refcnt > 1);

req->wrk = NULL;
req->waitinglist = 1;
req->waitinglist_gen = oh->waitinglist_gen;

if (DO_DEBUG(DBG_WAITINGLIST))
VSLb(req->vsl, SLT_Debug, "on waiting list <%p>", oh);
Expand Down Expand Up @@ -623,25 +678,32 @@ hsh_rush1(const struct worker *wrk, struct objcore *oc, struct rush *r)

AZ(oc->flags & OC_F_BUSY);
AZ(oc->flags & OC_F_PRIVATE);
max = cache_param->rush_exponent;
if (oc->flags & (OC_F_WITHDRAWN|OC_F_FAILED))
max = 1;
else if (oc->flags & (OC_F_HFM|OC_F_HFP|OC_F_CANCEL|OC_F_DYING))
max = cache_param->rush_exponent;
else
max = INT_MAX; /* XXX: temp */
assert(max > 0);

if (oc->waitinglist_gen == 0) {
oc->waitinglist_gen = oh->waitinglist_gen;
oh->waitinglist_gen++;
}

for (i = 0; i < max; i++) {
req = VTAILQ_FIRST(&oh->waitinglist);
CHECK_OBJ_ORNULL(req, REQ_MAGIC);
if (req == NULL)
break;
CHECK_OBJ_NOTNULL(req, REQ_MAGIC);
wrk->stats->busy_wakeup++;
if (req->waitinglist_gen > oc->waitinglist_gen)
continue;

AZ(req->wrk);
VTAILQ_REMOVE(&oh->waitinglist, req, w_list);
VTAILQ_INSERT_TAIL(&r->reqs, req, w_list);
req->waitinglist = 0;
req->objcore = oc;
}

oc->refcnt += i;
wrk->stats->busy_wakeup += i;
}

/*---------------------------------------------------------------------
Expand Down Expand Up @@ -902,8 +964,8 @@ HSH_Withdraw(struct worker *wrk, struct objcore **ocp)
assert(oc->refcnt == 1);
assert(oh->refcnt > 0);
oc->flags = OC_F_WITHDRAWN;
hsh_rush1(wrk, oc, &rush);
AZ(HSH_DerefObjCoreUnlock(wrk, &oc));
hsh_rush1(wrk, oc, &rush); /* grabs up to 1 oc ref */
assert(HSH_DerefObjCoreUnlock(wrk, &oc) <= 1);

hsh_rush2(wrk, &rush);
}
Expand Down
1 change: 1 addition & 0 deletions bin/varnishd/cache/cache_objhead.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ struct objhead {
struct lock mtx;
VTAILQ_HEAD(,objcore) objcs;
uint8_t digest[DIGEST_LEN];
unsigned waitinglist_gen;
VTAILQ_HEAD(, req) waitinglist;

/*----------------------------------------------------
Expand Down
15 changes: 9 additions & 6 deletions bin/varnishd/cache/cache_req_fsm.c
Original file line number Diff line number Diff line change
Expand Up @@ -557,20 +557,23 @@ cnt_lookup(struct worker *wrk, struct req *req)
{
struct objcore *oc, *busy;
enum lookup_e lr;
int had_objhead = 0;
int had_objcore = 0;

CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
CHECK_OBJ_NOTNULL(req, REQ_MAGIC);
AZ(req->objcore);
AZ(req->stale_oc);

AN(req->vcl);

VRY_Prep(req);

AZ(req->objcore);
if (req->hash_objhead)
had_objhead = 1;
if (req->waitinglist_gen) {
CHECK_OBJ_NOTNULL(req->objcore, OBJCORE_MAGIC);
req->waitinglist_gen = 0;
had_objcore = 1;
} else
AZ(req->objcore);

wrk->strangelove = 0;
lr = HSH_Lookup(req, &oc, &busy);
if (lr == HSH_BUSY) {
Expand All @@ -586,7 +589,7 @@ cnt_lookup(struct worker *wrk, struct req *req)
if ((unsigned)wrk->strangelove >= cache_param->vary_notice)
VSLb(req->vsl, SLT_Notice, "vsl: High number of variants (%d)",
wrk->strangelove);
if (had_objhead)
if (had_objcore)
VSLb_ts_req(req, "Waitinglist", W_TIM_real(wrk));

if (req->vcf != NULL) {
Expand Down
3 changes: 1 addition & 2 deletions bin/varnishd/cache/cache_vary.c
Original file line number Diff line number Diff line change
Expand Up @@ -224,8 +224,7 @@ vry_cmp(const uint8_t *v1, const uint8_t *v2)
void
VRY_Prep(struct req *req)
{
if (req->hash_objhead == NULL) {
/* Not a waiting list return */
if (req->waitinglist_gen == 0) {
AZ(req->vary_b);
AZ(req->vary_e);
(void)WS_ReserveAll(req->ws);
Expand Down
Loading

0 comments on commit cd28daa

Please sign in to comment.