Skip to content

Commit 253e208

Browse files
tool/ex/tests: consistently free ctx, then model
1 parent ec7b932 commit 253e208

File tree

6 files changed

+47
-18
lines changed

6 files changed

+47
-18
lines changed

common/common.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1078,6 +1078,8 @@ struct common_init_result::impl {
10781078
impl() = default;
10791079
~impl() = default;
10801080

1081+
// note: the order in which model, context, etc. are declared matters because their destructors will be called bottom-to-top
1082+
10811083
llama_model_ptr model;
10821084
llama_context_ptr context;
10831085

src/llama-context.cpp

Lines changed: 16 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -459,23 +459,22 @@ llama_context::llama_context(
459459
}
460460

461461
llama_context::~llama_context() {
462-
// FIXME this currently results in a use-after-free bug if the model is freed before the context
463-
// if (!model.hparams.no_alloc) {
464-
// for (size_t i = 0; i < backend_ptrs.size(); ++i) {
465-
// ggml_backend_t backend = backend_ptrs[i];
466-
// ggml_backend_buffer_type_t buft = backend_buft[i];
467-
468-
// const size_t size_exp = backend_buf_exp_size[i];
469-
// const size_t size_act = ggml_backend_sched_get_buffer_size(sched.get(), backend);
470-
// if (size_exp == size_act) {
471-
// LLAMA_LOG_DEBUG("%s: %10s compute buffer size is %8.4f MiB, matches expectation of %8.4f MiB\n",
472-
// __func__, ggml_backend_buft_name(buft), size_act / (1024.0*1024.0), size_exp / (1024.0*1024.0));
473-
// } else {
474-
// LLAMA_LOG_WARN("%s: %10s compute buffer size of %8.4f MiB, does not match expectation of %8.4f MiB\n",
475-
// __func__, ggml_backend_buft_name(buft), size_act / (1024.0*1024.0), size_exp / (1024.0*1024.0));
476-
// }
477-
// }
478-
// }
462+
if (!model.hparams.no_alloc) {
463+
for (size_t i = 0; i < backend_ptrs.size(); ++i) {
464+
ggml_backend_t backend = backend_ptrs[i];
465+
ggml_backend_buffer_type_t buft = backend_buft[i];
466+
467+
const size_t size_exp = backend_buf_exp_size[i];
468+
const size_t size_act = ggml_backend_sched_get_buffer_size(sched.get(), backend);
469+
if (size_exp == size_act) {
470+
LLAMA_LOG_DEBUG("%s: %10s compute buffer size is %8.4f MiB, matches expectation of %8.4f MiB\n",
471+
__func__, ggml_backend_buft_name(buft), size_act / (1024.0*1024.0), size_exp / (1024.0*1024.0));
472+
} else {
473+
LLAMA_LOG_WARN("%s: %10s compute buffer size of %8.4f MiB, does not match expectation of %8.4f MiB\n",
474+
__func__, ggml_backend_buft_name(buft), size_act / (1024.0*1024.0), size_exp / (1024.0*1024.0));
475+
}
476+
}
477+
}
479478
ggml_opt_free(opt_ctx);
480479
}
481480

tests/test-grammar-llguidance.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1196,6 +1196,9 @@ int main(int argc, const char ** argv) {
11961196

11971197
test_sampler_chain();
11981198

1199+
llama_free(ctx);
1200+
llama_model_free(model);
1201+
11991202
fprintf(stdout, "All tests passed.\n");
12001203
return 0;
12011204
}

tests/test-tokenizer-1-bpe.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,8 +146,8 @@ int main(int argc, char **argv) {
146146
}
147147
}
148148

149-
llama_model_free(model);
150149
llama_free(ctx);
150+
llama_model_free(model);
151151

152152
llama_backend_free();
153153

tools/batched-bench/batched-bench.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ int main(int argc, char ** argv) {
5555

5656
if (ctx == NULL) {
5757
fprintf(stderr , "%s: error: failed to create the llama_context\n" , __func__);
58+
llama_model_free(model);
5859
return 1;
5960
}
6061

@@ -108,6 +109,8 @@ int main(int argc, char ** argv) {
108109

109110
if (!decode_helper(ctx, batch, ctx_params.n_batch, true)) {
110111
LOG_ERR("%s: llama_decode() failed\n", __func__);
112+
llama_free(ctx);
113+
llama_model_free(model);
111114
return 1;
112115
}
113116
}
@@ -147,6 +150,8 @@ int main(int argc, char ** argv) {
147150

148151
if (!decode_helper(ctx, batch, ctx_params.n_batch, false)) {
149152
LOG_ERR("%s: llama_decode() failed\n", __func__);
153+
llama_free(ctx);
154+
llama_model_free(model);
150155
return 1;
151156
}
152157

@@ -165,6 +170,8 @@ int main(int argc, char ** argv) {
165170
common_batch_add(batch, get_token_rand(), pp + 0, { 0 }, true);
166171
if (!decode_helper(ctx, batch, ctx_params.n_batch, true)) {
167172
LOG_ERR("%s: llama_decode() failed\n", __func__);
173+
llama_free(ctx);
174+
llama_model_free(model);
168175
return 1;
169176
}
170177
llama_memory_seq_rm(mem, 0, pp, -1);
@@ -184,6 +191,8 @@ int main(int argc, char ** argv) {
184191

185192
if (!decode_helper(ctx, batch, ctx_params.n_batch, true)) {
186193
LOG_ERR("%s: llama_decode() failed\n", __func__);
194+
llama_free(ctx);
195+
llama_model_free(model);
187196
return 1;
188197
}
189198
}
@@ -200,6 +209,8 @@ int main(int argc, char ** argv) {
200209

201210
if (!decode_helper(ctx, batch, ctx_params.n_batch, true)) {
202211
LOG_ERR("%s: llama_decode() failed\n", __func__);
212+
llama_free(ctx);
213+
llama_model_free(model);
203214
return 1;
204215
}
205216
}

tools/llama-bench/llama-bench.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2102,6 +2102,8 @@ int main(int argc, char ** argv) {
21022102
struct ggml_threadpool_params tpp = ggml_threadpool_params_default(t.n_threads);
21032103
if (!parse_cpu_mask(t.cpu_mask, tpp.cpumask)) {
21042104
fprintf(stderr, "%s: failed to parse cpu-mask: %s\n", __func__, t.cpu_mask.c_str());
2105+
llama_free(ctx);
2106+
llama_model_free(lmodel);
21052107
exit(1);
21062108
}
21072109
tpp.strict_cpu = t.cpu_strict;
@@ -2111,6 +2113,8 @@ int main(int argc, char ** argv) {
21112113
struct ggml_threadpool * threadpool = ggml_threadpool_new_fn(&tpp);
21122114
if (!threadpool) {
21132115
fprintf(stderr, "%s: threadpool create failed : n_threads %d\n", __func__, tpp.n_threads);
2116+
llama_free(ctx);
2117+
llama_model_free(lmodel);
21142118
exit(1);
21152119
}
21162120

@@ -2126,6 +2130,8 @@ int main(int argc, char ** argv) {
21262130
bool res = test_prompt(ctx, t.n_prompt, t.n_batch, t.n_threads);
21272131
if (!res) {
21282132
fprintf(stderr, "%s: error: failed to run prompt warmup\n", __func__);
2133+
llama_free(ctx);
2134+
llama_model_free(lmodel);
21292135
exit(1);
21302136
}
21312137
}
@@ -2136,6 +2142,8 @@ int main(int argc, char ** argv) {
21362142
bool res = test_gen(ctx, 1, t.n_threads);
21372143
if (!res) {
21382144
fprintf(stderr, "%s: error: failed to run gen warmup\n", __func__);
2145+
llama_free(ctx);
2146+
llama_model_free(lmodel);
21392147
exit(1);
21402148
}
21412149
}
@@ -2164,6 +2172,8 @@ int main(int argc, char ** argv) {
21642172
bool res = test_prompt(ctx, t.n_depth, t.n_batch, t.n_threads);
21652173
if (!res) {
21662174
fprintf(stderr, "%s: error: failed to run depth\n", __func__);
2175+
llama_free(ctx);
2176+
llama_model_free(lmodel);
21672177
exit(1);
21682178
}
21692179

@@ -2189,6 +2199,8 @@ int main(int argc, char ** argv) {
21892199
bool res = test_prompt(ctx, t.n_prompt, t.n_batch, t.n_threads);
21902200
if (!res) {
21912201
fprintf(stderr, "%s: error: failed to run prompt\n", __func__);
2202+
llama_free(ctx);
2203+
llama_model_free(lmodel);
21922204
exit(1);
21932205
}
21942206
}
@@ -2200,6 +2212,8 @@ int main(int argc, char ** argv) {
22002212
bool res = test_gen(ctx, t.n_gen, t.n_threads);
22012213
if (!res) {
22022214
fprintf(stderr, "%s: error: failed to run gen\n", __func__);
2215+
llama_free(ctx);
2216+
llama_model_free(lmodel);
22032217
exit(1);
22042218
}
22052219
}

0 commit comments

Comments
 (0)