added emote reaction support, better emote support in reactions, implemented llm refusal and retry logic, improved some inline documentation
This commit is contained in:
@ -28,3 +28,14 @@ model User {
|
||||
userFqn String @unique
|
||||
lastRespondedTo DateTime?
|
||||
}
|
||||
|
||||
model Reaction {
|
||||
id Int @id @default(autoincrement())
|
||||
statusId String // The Pleroma status ID we reacted to
|
||||
emojiName String // The emoji we used to react
|
||||
reactedAt DateTime @default(now())
|
||||
createdAt DateTime @default(now())
|
||||
|
||||
@@unique([statusId]) // Prevent multiple reactions to same status
|
||||
@@map("reactions")
|
||||
}
|
177
src/api.ts
177
src/api.ts
@ -1,5 +1,7 @@
|
||||
import { envConfig, prisma } from "./main.js";
|
||||
import { PleromaEmoji, Notification, ContextResponse } from "../types.js";
|
||||
import { selectRandomEmojis } from "./util.js";
|
||||
|
||||
|
||||
const getNotifications = async () => {
|
||||
const { bearerToken, pleromaInstanceUrl } = envConfig;
|
||||
@ -98,9 +100,184 @@ const deleteNotification = async (notification: Notification) => {
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* React to a status with a random emoji
|
||||
*/
|
||||
const reactToStatus = async (statusId: string, emojiName: string): Promise<boolean> => {
|
||||
const { bearerToken, pleromaInstanceUrl } = envConfig;
|
||||
|
||||
try {
|
||||
const response = await fetch(
|
||||
`${pleromaInstanceUrl}/api/v1/statuses/${statusId}/react/${emojiName}`,
|
||||
{
|
||||
method: "PUT",
|
||||
headers: {
|
||||
Authorization: `Bearer ${bearerToken}`,
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
}
|
||||
);
|
||||
|
||||
if (!response.ok) {
|
||||
console.error(`Failed to react to status ${statusId}: ${response.status} - ${response.statusText}`);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
} catch (error: any) {
|
||||
console.error(`Error reacting to status ${statusId}: ${error.message}`);
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Check if we've already reacted to a status
|
||||
*/
|
||||
const hasAlreadyReacted = async (statusId: string): Promise<boolean> => {
|
||||
try {
|
||||
const reaction = await prisma.reaction.findFirst({
|
||||
where: { statusId: statusId },
|
||||
});
|
||||
return !!reaction;
|
||||
} catch (error: any) {
|
||||
console.error(`Error checking reaction status: ${error.message}`);
|
||||
return true; // Assume we've reacted to avoid spamming on error
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Record that we've reacted to a status
|
||||
*/
|
||||
const recordReaction = async (statusId: string, emojiName: string): Promise<void> => {
|
||||
try {
|
||||
await prisma.reaction.create({
|
||||
data: {
|
||||
statusId: statusId,
|
||||
emojiName: emojiName,
|
||||
reactedAt: new Date(),
|
||||
},
|
||||
});
|
||||
} catch (error: any) {
|
||||
console.error(`Error recording reaction: ${error.message}`);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Decide whether to react to a post (not every post gets a reaction)
|
||||
*/
|
||||
const shouldReactToPost = (): boolean => {
|
||||
// React to roughly 30% of posts
|
||||
return Math.random() < 0.3;
|
||||
};
|
||||
|
||||
/**
|
||||
* Get appropriate reaction emojis based on content sentiment/keywords
|
||||
*/
|
||||
const getContextualEmoji = (content: string, availableEmojis: string[]): string => {
|
||||
const contentLower = content.toLowerCase();
|
||||
|
||||
// Define emoji categories with keywords
|
||||
const emojiCategories = {
|
||||
positive: ['happy', 'smile', 'joy', 'love', 'heart', 'thumbsup', 'fire', 'based'],
|
||||
negative: ['sad', 'cry', 'angry', 'rage', 'disappointed', 'cringe'],
|
||||
thinking: ['think', 'hmm', 'brain', 'smart', 'curious'],
|
||||
laughing: ['laugh', 'lol', 'kek', 'funny', 'haha', 'rofl'],
|
||||
agreement: ['yes', 'agree', 'nod', 'correct', 'true', 'based'],
|
||||
surprise: ['wow', 'amazing', 'surprised', 'shock', 'omg'],
|
||||
};
|
||||
|
||||
// Keywords that might indicate sentiment
|
||||
const sentimentKeywords = {
|
||||
positive: ['good', 'great', 'awesome', 'nice', 'love', 'happy', 'excellent', 'perfect'],
|
||||
negative: ['bad', 'terrible', 'hate', 'awful', 'horrible', 'worst', 'sucks'],
|
||||
funny: ['lol', 'haha', 'funny', 'hilarious', 'joke', 'meme'],
|
||||
question: ['?', 'what', 'how', 'why', 'when', 'where'],
|
||||
agreement: ['yes', 'exactly', 'true', 'right', 'correct', 'agree'],
|
||||
thinking: ['think', 'consider', 'maybe', 'perhaps', 'hmm', 'interesting'],
|
||||
};
|
||||
|
||||
// Check content sentiment and find matching emojis
|
||||
for (const [sentiment, keywords] of Object.entries(sentimentKeywords)) {
|
||||
if (keywords.some(keyword => contentLower.includes(keyword))) {
|
||||
const categoryEmojis = emojiCategories[sentiment as keyof typeof emojiCategories];
|
||||
if (categoryEmojis) {
|
||||
const matchingEmojis = availableEmojis.filter(emoji =>
|
||||
categoryEmojis.some(cat => emoji.toLowerCase().includes(cat))
|
||||
);
|
||||
if (matchingEmojis.length > 0) {
|
||||
return matchingEmojis[Math.floor(Math.random() * matchingEmojis.length)];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback to random emoji from a curated list of common reactions
|
||||
const commonReactions = availableEmojis.filter(emoji =>
|
||||
['heart', 'thumbsup', 'fire', 'kek', 'based', 'think', 'smile', 'laugh']
|
||||
.some(common => emoji.toLowerCase().includes(common))
|
||||
);
|
||||
|
||||
if (commonReactions.length > 0) {
|
||||
return commonReactions[Math.floor(Math.random() * commonReactions.length)];
|
||||
}
|
||||
|
||||
// Final fallback to any random emoji
|
||||
return availableEmojis[Math.floor(Math.random() * availableEmojis.length)];
|
||||
};
|
||||
|
||||
/**
|
||||
* Main function to handle post reactions
|
||||
*/
|
||||
const handlePostReaction = async (notification: Notification): Promise<void> => {
|
||||
try {
|
||||
const statusId = notification.status.id;
|
||||
|
||||
// Check if we should react to this post
|
||||
if (!shouldReactToPost()) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Check if we've already reacted
|
||||
if (await hasAlreadyReacted(statusId)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Get available emojis
|
||||
const emojiList = await getInstanceEmojis();
|
||||
if (!emojiList || emojiList.length === 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Select a smaller random pool for reactions (5-10 emojis)
|
||||
const reactionPool = selectRandomEmojis(emojiList, 8);
|
||||
|
||||
// Get contextual emoji based on post content
|
||||
const selectedEmoji = getContextualEmoji(
|
||||
notification.status.pleroma.content["text/plain"],
|
||||
reactionPool
|
||||
);
|
||||
|
||||
// React to the post
|
||||
const success = await reactToStatus(statusId, selectedEmoji);
|
||||
|
||||
if (success) {
|
||||
await recordReaction(statusId, selectedEmoji);
|
||||
console.log(`Reacted to status ${statusId} with :${selectedEmoji}:`);
|
||||
}
|
||||
|
||||
} catch (error: any) {
|
||||
console.error(`Error handling post reaction: ${error.message}`);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
export {
|
||||
deleteNotification,
|
||||
getInstanceEmojis,
|
||||
getNotifications,
|
||||
getStatusContext,
|
||||
reactToStatus,
|
||||
handlePostReaction,
|
||||
hasAlreadyReacted,
|
||||
};
|
||||
|
65
src/main.ts
65
src/main.ts
@ -13,6 +13,7 @@ import {
|
||||
deleteNotification,
|
||||
getNotifications,
|
||||
getStatusContext,
|
||||
handlePostReaction,
|
||||
} from "./api.js";
|
||||
import { storeUserData, storePromptData } from "./prisma.js";
|
||||
import {
|
||||
@ -20,7 +21,9 @@ import {
|
||||
alreadyRespondedTo,
|
||||
recordPendingResponse,
|
||||
// trimInputData,
|
||||
selectRandomEmoji,
|
||||
// selectRandomEmoji,
|
||||
selectRandomEmojis,
|
||||
isLLMRefusal,
|
||||
shouldContinue,
|
||||
} from "./util.js";
|
||||
|
||||
@ -59,7 +62,8 @@ const ollamaConfig: OllamaConfigOptions = {
|
||||
// https://replicate.com/blog/how-to-prompt-llama
|
||||
|
||||
const generateOllamaRequest = async (
|
||||
notification: Notification
|
||||
notification: Notification,
|
||||
retryAttempt: number = 0
|
||||
): Promise<OllamaChatResponse | undefined> => {
|
||||
const {
|
||||
whitelistOnly,
|
||||
@ -68,6 +72,7 @@ const generateOllamaRequest = async (
|
||||
ollamaUrl,
|
||||
replyWithContext,
|
||||
} = envConfig;
|
||||
|
||||
try {
|
||||
if (shouldContinue(notification)) {
|
||||
if (whitelistOnly && !isFromWhitelistedDomain(notification)) {
|
||||
@ -79,6 +84,7 @@ const generateOllamaRequest = async (
|
||||
}
|
||||
await recordPendingResponse(notification);
|
||||
await storeUserData(notification);
|
||||
|
||||
let conversationHistory: PostAncestorsForModel[] = [];
|
||||
if (replyWithContext) {
|
||||
const contextPosts = await getStatusContext(notification.status.id);
|
||||
@ -93,15 +99,20 @@ const generateOllamaRequest = async (
|
||||
plaintext_content: ancestor.pleroma.content["text/plain"],
|
||||
};
|
||||
});
|
||||
// console.log(conversationHistory);
|
||||
}
|
||||
|
||||
// Simplified user message (remove [/INST] as it's not needed for Llama 3)
|
||||
const userMessage = `${notification.status.account.fqn} says: ${notification.status.pleroma.content["text/plain"]}`;
|
||||
|
||||
let systemContent = ollamaSystemPrompt;
|
||||
// Get random emojis for this request
|
||||
const emojiList = await getInstanceEmojis();
|
||||
let availableEmojis = "";
|
||||
if (emojiList && emojiList.length > 0) {
|
||||
const randomEmojis = selectRandomEmojis(emojiList, 20);
|
||||
availableEmojis = `\n\nAvailable custom emojis you can use in your response (or use none!) (format as :emoji_name:): ${randomEmojis.join(", ")}`;
|
||||
}
|
||||
|
||||
let systemContent = ollamaSystemPrompt + availableEmojis;
|
||||
if (replyWithContext) {
|
||||
// Simplified context instructions (avoid heavy JSON; summarize for clarity)
|
||||
systemContent = `${ollamaSystemPrompt}\n\nPrevious conversation context:\n${conversationHistory
|
||||
.map(
|
||||
(post) =>
|
||||
@ -111,10 +122,15 @@ const generateOllamaRequest = async (
|
||||
)
|
||||
.join(
|
||||
"\n"
|
||||
)}\nReply as if you are a party to the conversation. If '@nice-ai' is mentioned, respond directly. Prefix usernames with '@' when addressing them.`;
|
||||
)}\nReply as if you are a party to the conversation. If '@nice-ai' is mentioned, respond directly. Prefix usernames with '@' when addressing them.${availableEmojis}`;
|
||||
}
|
||||
|
||||
// Switch to chat request format (messages array auto-handles Llama 3 template)
|
||||
// Use different seeds for retry attempts
|
||||
const currentConfig = {
|
||||
...ollamaConfig,
|
||||
seed: retryAttempt > 0 ? Math.floor(Math.random() * 1000000) : ollamaConfig.seed,
|
||||
};
|
||||
|
||||
const ollamaRequestBody: OllamaChatRequest = {
|
||||
model: ollamaModel,
|
||||
messages: [
|
||||
@ -122,16 +138,21 @@ const generateOllamaRequest = async (
|
||||
{ role: "user", content: userMessage },
|
||||
],
|
||||
stream: false,
|
||||
options: ollamaConfig,
|
||||
options: currentConfig,
|
||||
};
|
||||
|
||||
// Change endpoint to /api/chat
|
||||
const response = await fetch(`${ollamaUrl}/api/chat`, {
|
||||
method: "POST",
|
||||
body: JSON.stringify(ollamaRequestBody),
|
||||
});
|
||||
const ollamaResponse: OllamaChatResponse = await response.json();
|
||||
|
||||
// Check for refusal and retry up to 2 times
|
||||
if (isLLMRefusal(ollamaResponse.message.content) && retryAttempt < 2) {
|
||||
console.log(`LLM refused to answer (attempt ${retryAttempt + 1}), retrying with different seed...`);
|
||||
return generateOllamaRequest(notification, retryAttempt + 1);
|
||||
}
|
||||
|
||||
await storePromptData(notification, ollamaResponse);
|
||||
return ollamaResponse;
|
||||
}
|
||||
@ -145,16 +166,11 @@ const postReplyToStatus = async (
|
||||
ollamaResponseBody: OllamaChatResponse
|
||||
) => {
|
||||
const { pleromaInstanceUrl, bearerToken } = envConfig;
|
||||
const emojiList = await getInstanceEmojis();
|
||||
let randomEmoji;
|
||||
if (emojiList) {
|
||||
randomEmoji = selectRandomEmoji(emojiList);
|
||||
}
|
||||
try {
|
||||
let mentions: string[];
|
||||
const statusBody: NewStatusBody = {
|
||||
content_type: "text/markdown",
|
||||
status: `${ollamaResponseBody.message.content} :${randomEmoji}:`,
|
||||
status: ollamaResponseBody.message.content,
|
||||
in_reply_to_id: notification.status.id,
|
||||
};
|
||||
if (
|
||||
@ -247,17 +263,28 @@ const beginFetchCycle = async () => {
|
||||
await Promise.all(
|
||||
notifications.map(async (notification) => {
|
||||
try {
|
||||
// Handle reactions first (before generating response)
|
||||
// This way we can react even if response generation fails
|
||||
await handlePostReaction(notification);
|
||||
|
||||
// Then handle the response generation as before
|
||||
const ollamaResponse = await generateOllamaRequest(notification);
|
||||
if (ollamaResponse) {
|
||||
postReplyToStatus(notification, ollamaResponse);
|
||||
await postReplyToStatus(notification, ollamaResponse);
|
||||
}
|
||||
} catch (error: any) {
|
||||
throw new Error(error.message);
|
||||
console.error(`Error processing notification ${notification.id}: ${error.message}`);
|
||||
// Still try to delete the notification to avoid getting stuck
|
||||
try {
|
||||
await deleteNotification(notification);
|
||||
} catch (deleteError: any) {
|
||||
console.error(`Failed to delete notification: ${deleteError.message}`);
|
||||
}
|
||||
}
|
||||
})
|
||||
);
|
||||
}
|
||||
}, envConfig.fetchInterval); // lower intervals may cause the bot to respond multiple times to the same message, but we try to mitigate this with the deleteNotification function
|
||||
}, envConfig.fetchInterval);
|
||||
};
|
||||
|
||||
const beginStatusPostInterval = async () => {
|
||||
|
38
src/util.ts
38
src/util.ts
@ -98,9 +98,47 @@ const selectRandomEmoji = (emojiList: string[]) => {
|
||||
return emojiList[Math.floor(Math.random() * emojiList.length)];
|
||||
};
|
||||
|
||||
const selectRandomEmojis = (emojiList: string[], count: number = 20): string[] => {
|
||||
if (emojiList.length <= count) return emojiList;
|
||||
|
||||
const shuffled = [...emojiList].sort(() => 0.5 - Math.random());
|
||||
return shuffled.slice(0, count);
|
||||
};
|
||||
|
||||
const isLLMRefusal = (response: string): boolean => {
|
||||
const refusalPatterns = [
|
||||
/i can't|i cannot|unable to|i'm not able to/i,
|
||||
/i don't feel comfortable/i,
|
||||
/i'm not comfortable/i,
|
||||
/i shouldn't|i won't/i,
|
||||
/that's not something i can/i,
|
||||
/i'm not programmed to/i,
|
||||
/i'm an ai (assistant|language model)/i,
|
||||
/as an ai/i,
|
||||
/i apologize, but/i,
|
||||
/i must decline/i,
|
||||
/that would be inappropriate/i,
|
||||
/i'm not supposed to/i,
|
||||
/i'd rather not/i,
|
||||
/i prefer not to/i,
|
||||
/against my guidelines/i,
|
||||
/violates my programming/i,
|
||||
];
|
||||
|
||||
const normalizedResponse = response.toLowerCase().trim();
|
||||
|
||||
// Check if response is too short (likely a refusal)
|
||||
if (normalizedResponse.length < 20) return true;
|
||||
|
||||
// Check for refusal patterns
|
||||
return refusalPatterns.some(pattern => pattern.test(normalizedResponse));
|
||||
};
|
||||
|
||||
export {
|
||||
alreadyRespondedTo,
|
||||
selectRandomEmoji,
|
||||
selectRandomEmojis,
|
||||
isLLMRefusal,
|
||||
trimInputData,
|
||||
recordPendingResponse,
|
||||
isFromWhitelistedDomain,
|
||||
|
174
types.d.ts
vendored
174
types.d.ts
vendored
@ -158,48 +158,182 @@ interface PleromaEmojiMetadata {
|
||||
tags: string[];
|
||||
}
|
||||
|
||||
interface ReactionRequest {
|
||||
name: string; // emoji name without colons
|
||||
}
|
||||
|
||||
interface ReactionResponse {
|
||||
name: string;
|
||||
count: number;
|
||||
me: boolean;
|
||||
url?: string;
|
||||
static_url?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Experimental settings, I wouldn't recommend messing with these if you don't know how they work (I don't either)
|
||||
*/
|
||||
export interface OllamaConfigOptions {
|
||||
/**
|
||||
* Number of tokens guaranteed to be kept in memory during response generation. Higher values leave less
|
||||
* possible room for num_ctx
|
||||
* Number of tokens guaranteed to be kept in memory during response generation.
|
||||
* Higher values leave less room for num_ctx. Used to preserve important context.
|
||||
* Default: 0, Range: 0-512
|
||||
*/
|
||||
num_keep?: number;
|
||||
seed?: number;
|
||||
|
||||
/**
|
||||
* Sets maximum of tokens in the response
|
||||
* Random seed for reproducible outputs. Same seed + same inputs = same output.
|
||||
* Default: -1 (random), Range: any integer
|
||||
*/
|
||||
seed?: number;
|
||||
|
||||
/**
|
||||
* Maximum number of tokens to generate in the response. Controls response length.
|
||||
* Default: 128, Range: 1-4096+ (model dependent)
|
||||
*/
|
||||
num_predict?: number;
|
||||
top_k?: number;
|
||||
top_p?: number;
|
||||
min_p?: number;
|
||||
typical_p?: number;
|
||||
repeat_last_n?: number;
|
||||
|
||||
/**
|
||||
* How close of a response should the response be to the original prompt - lower = more focused response
|
||||
* Limits token selection to top K most probable tokens. Reduces randomness.
|
||||
* Default: 40, Range: 1-100 (higher = more diverse)
|
||||
*/
|
||||
top_k?: number;
|
||||
|
||||
/**
|
||||
* Nucleus sampling - cumulative probability cutoff for token selection.
|
||||
* Default: 0.9, Range: 0.0-1.0 (lower = more focused)
|
||||
*/
|
||||
top_p?: number;
|
||||
|
||||
/**
|
||||
* Alternative to top_p - minimum probability threshold for tokens.
|
||||
* Default: 0.0, Range: 0.0-1.0 (higher = more selective)
|
||||
*/
|
||||
min_p?: number;
|
||||
|
||||
/**
|
||||
* Typical sampling - targets tokens with "typical" probability mass.
|
||||
* Default: 1.0 (disabled), Range: 0.0-1.0 (lower = less random)
|
||||
*/
|
||||
typical_p?: number;
|
||||
|
||||
/**
|
||||
* Number of previous tokens to consider for repetition penalty.
|
||||
* Default: 64, Range: 0-512
|
||||
*/
|
||||
repeat_last_n?: number;
|
||||
|
||||
/**
|
||||
* Randomness/creativity control. Lower = more deterministic, higher = more creative.
|
||||
* Default: 0.8, Range: 0.0-2.0 (sweet spot: 0.1-1.2)
|
||||
*/
|
||||
temperature?: number;
|
||||
repeat_penalty?: number;
|
||||
presence_penalty?: number;
|
||||
frequency_penalty?: number;
|
||||
mirostat?: number;
|
||||
mirostat_tau?: number;
|
||||
mirostat_eta?: number;
|
||||
penalize_newline?: boolean;
|
||||
stop?: string[];
|
||||
numa?: boolean;
|
||||
|
||||
/**
|
||||
* Number of tokens for the prompt to keep in memory for the response, minus the value of num_keep
|
||||
* Penalty for repeating tokens. Higher values reduce repetition.
|
||||
* Default: 1.1, Range: 0.0-2.0 (1.0 = no penalty)
|
||||
*/
|
||||
repeat_penalty?: number;
|
||||
|
||||
/**
|
||||
* Penalty for using tokens that have already appeared (OpenAI-style).
|
||||
* Default: 0.0, Range: -2.0 to 2.0
|
||||
*/
|
||||
presence_penalty?: number;
|
||||
|
||||
/**
|
||||
* Penalty proportional to token frequency in text (OpenAI-style).
|
||||
* Default: 0.0, Range: -2.0 to 2.0
|
||||
*/
|
||||
frequency_penalty?: number;
|
||||
|
||||
/**
|
||||
* Enables Mirostat sampling algorithm (0=disabled, 1=v1, 2=v2).
|
||||
* Default: 0, Range: 0, 1, or 2
|
||||
*/
|
||||
mirostat?: number;
|
||||
|
||||
/**
|
||||
* Target entropy for Mirostat. Controls coherence vs creativity balance.
|
||||
* Default: 5.0, Range: 0.0-10.0
|
||||
*/
|
||||
mirostat_tau?: number;
|
||||
|
||||
/**
|
||||
* Learning rate for Mirostat. How quickly it adapts.
|
||||
* Default: 0.1, Range: 0.001-1.0
|
||||
*/
|
||||
mirostat_eta?: number;
|
||||
|
||||
/**
|
||||
* Apply penalty to newline tokens to control formatting.
|
||||
* Default: true
|
||||
*/
|
||||
penalize_newline?: boolean;
|
||||
|
||||
/**
|
||||
* Array of strings that will stop generation when encountered.
|
||||
* Default: [], Example: ["\n", "User:", "###"]
|
||||
*/
|
||||
stop?: string[];
|
||||
|
||||
/**
|
||||
* Enable NUMA (Non-Uniform Memory Access) optimization.
|
||||
* Default: false (Linux systems may benefit from true)
|
||||
*/
|
||||
numa?: boolean;
|
||||
|
||||
/**
|
||||
* Context window size - total tokens for prompt + response.
|
||||
* Default: 2048, Range: 512-32768+ (model dependent, affects memory usage)
|
||||
*/
|
||||
num_ctx?: number;
|
||||
|
||||
/**
|
||||
* Batch size for prompt processing. Higher = faster but more memory.
|
||||
* Default: 512, Range: 1-2048
|
||||
*/
|
||||
num_batch?: number;
|
||||
|
||||
/**
|
||||
* Number of GPU layers to offload. -1 = auto, 0 = CPU only.
|
||||
* Default: -1, Range: -1 to model layer count
|
||||
*/
|
||||
num_gpu?: number;
|
||||
|
||||
/**
|
||||
* Primary GPU device ID for multi-GPU setups.
|
||||
* Default: 0, Range: 0 to (GPU count - 1)
|
||||
*/
|
||||
main_gpu?: number;
|
||||
|
||||
/**
|
||||
* Optimize for low VRAM usage at cost of speed.
|
||||
* Default: false
|
||||
*/
|
||||
low_vram?: boolean;
|
||||
|
||||
/**
|
||||
* Only load vocabulary, skip weights. For tokenization only.
|
||||
* Default: false
|
||||
*/
|
||||
vocab_only?: boolean;
|
||||
|
||||
/**
|
||||
* Use memory mapping for model files (faster loading).
|
||||
* Default: true
|
||||
*/
|
||||
use_mmap?: boolean;
|
||||
|
||||
/**
|
||||
* Lock model in memory to prevent swapping.
|
||||
* Default: false (enable for consistent performance)
|
||||
*/
|
||||
use_mlock?: boolean;
|
||||
|
||||
/**
|
||||
* Number of CPU threads for inference.
|
||||
* Default: auto-detected, Range: 1 to CPU core count
|
||||
*/
|
||||
num_thread?: number;
|
||||
}
|
||||
|
Reference in New Issue
Block a user