Skip to content

Commit 38a07c4

Browse files
noobie-bobyousshim
andauthored
Remove the custom prompt formatting and rely on Ollama ex3ndr#67 (#10)
* feat: delegate the prompt formatting to ollama * refactor: add missing simicolon * Remove prompt cache * remove auto model downloads * Remove block proecessing * Use ollama without streaming --------- Co-authored-by: Yousshim <yousef98ibrahim@gmail.com>
1 parent d35824b commit 38a07c4

File tree

11 files changed

+83
-386
lines changed

11 files changed

+83
-386
lines changed

package.json

Lines changed: 0 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -83,37 +83,6 @@
8383
},
8484
"inference.model": {
8585
"type": "string",
86-
"enum": [
87-
"stable-code:3b-code-q4_0",
88-
"codellama:7b-code-q4_K_S",
89-
"codellama:7b-code-q4_K_M",
90-
"codellama:7b-code-q6_K",
91-
"codellama:7b-code-fp16",
92-
"codellama:13b-code-q4_K_S",
93-
"codellama:13b-code-q4_K_M",
94-
"codellama:13b-code-q6_K",
95-
"codellama:13b-code-fp16",
96-
"codellama:34b-code-q4_K_S",
97-
"codellama:34b-code-q4_K_M",
98-
"codellama:34b-code-q6_K",
99-
"codellama:70b-code-q4_K_S",
100-
"codellama:70b-code-q4_K_M",
101-
"codellama:70b-code-q6_K",
102-
"codellama:70b-code-fp16",
103-
"deepseek-coder:1.3b-base-q4_0",
104-
"deepseek-coder:1.3b-base-q4_1",
105-
"deepseek-coder:1.3b-base-q8_0",
106-
"deepseek-coder:6.7b-base-q4_K_S",
107-
"deepseek-coder:6.7b-base-q4_K_M",
108-
"deepseek-coder:6.7b-base-q5_K_S",
109-
"deepseek-coder:6.7b-base-q5_K_M",
110-
"deepseek-coder:6.7b-base-q8_0",
111-
"deepseek-coder:6.7b-base-fp16",
112-
"deepseek-coder:33b-base-q4_K_S",
113-
"deepseek-coder:33b-base-q4_K_M",
114-
"deepseek-coder:33b-base-fp16",
115-
"custom"
116-
],
11786
"default": "stable-code:3b-code-q4_0",
11887
"description": "Inference model to use",
11988
"order": 2
@@ -124,23 +93,6 @@
12493
"description": "Temperature of the model. Increasing the temperature will make the model answer more creatively.",
12594
"order": 3
12695
},
127-
"inference.custom.model": {
128-
"type": "string",
129-
"default": "",
130-
"description": "Custom model name",
131-
"order": 4
132-
},
133-
"inference.custom.format": {
134-
"type": "string",
135-
"enum": [
136-
"stable-code",
137-
"codellama",
138-
"deepseek"
139-
],
140-
"default": "stable-code",
141-
"description": "Custom model prompt format",
142-
"order": 5
143-
},
14496
"inference.maxLines": {
14597
"type": "number",
14698
"default": 16,

src/config.ts

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import vscode from 'vscode';
2-
import { ModelFormat } from './prompts/processors/models';
32

43
class Config {
54

@@ -24,17 +23,6 @@ class Config {
2423

2524
// Load model
2625
let modelName = config.get('model') as string;
27-
let modelFormat: ModelFormat = 'codellama';
28-
if (modelName === 'custom') {
29-
modelName = config.get('custom.model') as string;
30-
modelFormat = config.get('cutom.format') as ModelFormat;
31-
} else {
32-
if (modelName.startsWith('deepseek-coder')) {
33-
modelFormat = 'deepseek';
34-
} else if (modelName.startsWith('stable-code')) {
35-
modelFormat = 'stable-code';
36-
}
37-
}
3826

3927
let delay = config.get('delay') as number;
4028

@@ -45,7 +33,6 @@ class Config {
4533
maxTokens,
4634
temperature,
4735
modelName,
48-
modelFormat,
4936
delay
5037
};
5138
}

src/modules/lineGenerator.ts

Lines changed: 0 additions & 54 deletions
This file was deleted.

src/modules/ollamaCheckModel.ts

Lines changed: 0 additions & 21 deletions
This file was deleted.

src/modules/ollamaDownloadModel.ts

Lines changed: 0 additions & 9 deletions
This file was deleted.

src/modules/ollamaRequest.ts

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
export async function makeOllamaRequest(url: string, data: any, bearerToken: string): Promise<string> {
2+
// Request
3+
const controller = new AbortController();
4+
let res = await fetch(url, {
5+
method: 'POST',
6+
body: JSON.stringify(data),
7+
headers: bearerToken ? {
8+
'Content-Type': 'application/json',
9+
Authorization: `Bearer ${bearerToken}`,
10+
} : {
11+
'Content-Type': 'application/json',
12+
},
13+
signal: controller.signal,
14+
});
15+
if (!res.ok || !res.body) {
16+
throw Error('Unable to connect to backend');
17+
}
18+
19+
// Reading stream
20+
let stream = res.body.getReader();
21+
const decoder = new TextDecoder();
22+
try {
23+
const { value } = await stream.read();
24+
25+
// Append chunk
26+
let chunk = decoder.decode(value);
27+
return chunk;
28+
} finally {
29+
stream.releaseLock();
30+
if (!stream.closed) { // Stop generation
31+
await stream.cancel();
32+
}
33+
controller.abort();
34+
}
35+
}

src/modules/ollamaTokenGenerator.ts

Lines changed: 0 additions & 22 deletions
This file was deleted.

src/prompts/autocomplete.ts

Lines changed: 22 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
1-
import { ollamaTokenGenerator } from '../modules/ollamaTokenGenerator';
2-
import { countSymbol } from '../modules/text';
3-
import { info } from '../modules/log';
4-
import { ModelFormat, adaptPrompt } from './processors/models';
1+
import { makeOllamaRequest } from "../modules/ollamaRequest";
2+
3+
type OllamaToken = {
4+
model: string,
5+
response: string,
6+
};
57

68
export async function autocomplete(args: {
79
endpoint: string,
810
bearerToken: string,
911
model: string,
10-
format: ModelFormat,
1112
prefix: string,
1213
suffix: string,
1314
maxLines: number,
@@ -16,88 +17,33 @@ export async function autocomplete(args: {
1617
canceled?: () => boolean,
1718
}): Promise<string> {
1819

19-
let prompt = adaptPrompt({ prefix: args.prefix, suffix: args.suffix, format: args.format });
20-
2120
// Calculate arguments
2221
let data = {
2322
model: args.model,
24-
prompt: prompt.prompt,
23+
prompt: args.prefix,
24+
suffix: args.suffix,
2525
raw: true,
26+
stream: false,
2627
options: {
27-
stop: prompt.stop,
2828
num_predict: args.maxTokens,
2929
temperature: args.temperature
3030
}
3131
};
3232

33-
// Receiving tokens
34-
let res = '';
35-
let totalLines = 1;
36-
let blockStack: ('[' | '(' | '{')[] = [];
37-
outer: for await (let tokens of ollamaTokenGenerator(args.endpoint + '/api/generate', data, args.bearerToken)) {
33+
const res = await makeOllamaRequest(args.endpoint + '/api/generate', data, args.bearerToken);
34+
try {
35+
const tokens = JSON.parse(res) as OllamaToken;
3836
if (args.canceled && args.canceled()) {
39-
break;
40-
}
41-
42-
// Block stack
43-
for (let c of tokens.response) {
44-
45-
// Open block
46-
if (c === '[') {
47-
blockStack.push('[');
48-
} else if (c === '(') {
49-
blockStack.push('(');
50-
}
51-
if (c === '{') {
52-
blockStack.push('{');
53-
}
54-
55-
// Close block
56-
if (c === ']') {
57-
if (blockStack.length > 0 && blockStack[blockStack.length - 1] === '[') {
58-
blockStack.pop();
59-
} else {
60-
info('Block stack error, breaking.');
61-
break outer;
62-
}
63-
}
64-
if (c === ')') {
65-
if (blockStack.length > 0 && blockStack[blockStack.length - 1] === '(') {
66-
blockStack.pop();
67-
} else {
68-
info('Block stack error, breaking.');
69-
break outer;
70-
}
71-
}
72-
if (c === '}') {
73-
if (blockStack.length > 0 && blockStack[blockStack.length - 1] === '{') {
74-
blockStack.pop();
75-
} else {
76-
info('Block stack error, breaking.');
77-
break outer;
78-
}
79-
}
80-
81-
// Append charater
82-
res += c;
83-
}
84-
85-
// Update total lines
86-
totalLines += countSymbol(tokens.response, '\n');
87-
// Break if too many lines and on top level
88-
if (totalLines > args.maxLines && blockStack.length === 0) {
89-
info('Too many lines, breaking.');
90-
break;
37+
return "";
9138
}
39+
const response = tokens.response;
40+
41+
// take only args.maLines lines from the response
42+
let lines = response.split('\n');
43+
lines = lines.slice(0, args.maxLines);
44+
return lines.join('\n');
45+
} catch (e) {
46+
console.warn('Receive wrong line: ' + res);
47+
return "";
9248
}
93-
94-
// Remove <EOT>
95-
if (res.endsWith('<EOT>')) {
96-
res = res.slice(0, res.length - 5);
97-
}
98-
99-
// Trim ends of all lines since sometimes the AI completion will add extra spaces
100-
res = res.split('\n').map((v) => v.trimEnd()).join('\n');
101-
102-
return res;
10349
}

0 commit comments

Comments
 (0)