added emote reaction support, better emote support in reactions, implemented llm refusal and retry logic, improved some inline documentation
This commit is contained in:
@ -28,3 +28,14 @@ model User {
|
|||||||
userFqn String @unique
|
userFqn String @unique
|
||||||
lastRespondedTo DateTime?
|
lastRespondedTo DateTime?
|
||||||
}
|
}
|
||||||
|
|
||||||
|
model Reaction {
|
||||||
|
id Int @id @default(autoincrement())
|
||||||
|
statusId String // The Pleroma status ID we reacted to
|
||||||
|
emojiName String // The emoji we used to react
|
||||||
|
reactedAt DateTime @default(now())
|
||||||
|
createdAt DateTime @default(now())
|
||||||
|
|
||||||
|
@@unique([statusId]) // Prevent multiple reactions to same status
|
||||||
|
@@map("reactions")
|
||||||
|
}
|
177
src/api.ts
177
src/api.ts
@ -1,5 +1,7 @@
|
|||||||
import { envConfig, prisma } from "./main.js";
|
import { envConfig, prisma } from "./main.js";
|
||||||
import { PleromaEmoji, Notification, ContextResponse } from "../types.js";
|
import { PleromaEmoji, Notification, ContextResponse } from "../types.js";
|
||||||
|
import { selectRandomEmojis } from "./util.js";
|
||||||
|
|
||||||
|
|
||||||
const getNotifications = async () => {
|
const getNotifications = async () => {
|
||||||
const { bearerToken, pleromaInstanceUrl } = envConfig;
|
const { bearerToken, pleromaInstanceUrl } = envConfig;
|
||||||
@ -98,9 +100,184 @@ const deleteNotification = async (notification: Notification) => {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* React to a status with a random emoji
|
||||||
|
*/
|
||||||
|
const reactToStatus = async (statusId: string, emojiName: string): Promise<boolean> => {
|
||||||
|
const { bearerToken, pleromaInstanceUrl } = envConfig;
|
||||||
|
|
||||||
|
try {
|
||||||
|
const response = await fetch(
|
||||||
|
`${pleromaInstanceUrl}/api/v1/statuses/${statusId}/react/${emojiName}`,
|
||||||
|
{
|
||||||
|
method: "PUT",
|
||||||
|
headers: {
|
||||||
|
Authorization: `Bearer ${bearerToken}`,
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
console.error(`Failed to react to status ${statusId}: ${response.status} - ${response.statusText}`);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error(`Error reacting to status ${statusId}: ${error.message}`);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if we've already reacted to a status
|
||||||
|
*/
|
||||||
|
const hasAlreadyReacted = async (statusId: string): Promise<boolean> => {
|
||||||
|
try {
|
||||||
|
const reaction = await prisma.reaction.findFirst({
|
||||||
|
where: { statusId: statusId },
|
||||||
|
});
|
||||||
|
return !!reaction;
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error(`Error checking reaction status: ${error.message}`);
|
||||||
|
return true; // Assume we've reacted to avoid spamming on error
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Record that we've reacted to a status
|
||||||
|
*/
|
||||||
|
const recordReaction = async (statusId: string, emojiName: string): Promise<void> => {
|
||||||
|
try {
|
||||||
|
await prisma.reaction.create({
|
||||||
|
data: {
|
||||||
|
statusId: statusId,
|
||||||
|
emojiName: emojiName,
|
||||||
|
reactedAt: new Date(),
|
||||||
|
},
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error(`Error recording reaction: ${error.message}`);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Decide whether to react to a post (not every post gets a reaction)
|
||||||
|
*/
|
||||||
|
const shouldReactToPost = (): boolean => {
|
||||||
|
// React to roughly 30% of posts
|
||||||
|
return Math.random() < 0.3;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get appropriate reaction emojis based on content sentiment/keywords
|
||||||
|
*/
|
||||||
|
const getContextualEmoji = (content: string, availableEmojis: string[]): string => {
|
||||||
|
const contentLower = content.toLowerCase();
|
||||||
|
|
||||||
|
// Define emoji categories with keywords
|
||||||
|
const emojiCategories = {
|
||||||
|
positive: ['happy', 'smile', 'joy', 'love', 'heart', 'thumbsup', 'fire', 'based'],
|
||||||
|
negative: ['sad', 'cry', 'angry', 'rage', 'disappointed', 'cringe'],
|
||||||
|
thinking: ['think', 'hmm', 'brain', 'smart', 'curious'],
|
||||||
|
laughing: ['laugh', 'lol', 'kek', 'funny', 'haha', 'rofl'],
|
||||||
|
agreement: ['yes', 'agree', 'nod', 'correct', 'true', 'based'],
|
||||||
|
surprise: ['wow', 'amazing', 'surprised', 'shock', 'omg'],
|
||||||
|
};
|
||||||
|
|
||||||
|
// Keywords that might indicate sentiment
|
||||||
|
const sentimentKeywords = {
|
||||||
|
positive: ['good', 'great', 'awesome', 'nice', 'love', 'happy', 'excellent', 'perfect'],
|
||||||
|
negative: ['bad', 'terrible', 'hate', 'awful', 'horrible', 'worst', 'sucks'],
|
||||||
|
funny: ['lol', 'haha', 'funny', 'hilarious', 'joke', 'meme'],
|
||||||
|
question: ['?', 'what', 'how', 'why', 'when', 'where'],
|
||||||
|
agreement: ['yes', 'exactly', 'true', 'right', 'correct', 'agree'],
|
||||||
|
thinking: ['think', 'consider', 'maybe', 'perhaps', 'hmm', 'interesting'],
|
||||||
|
};
|
||||||
|
|
||||||
|
// Check content sentiment and find matching emojis
|
||||||
|
for (const [sentiment, keywords] of Object.entries(sentimentKeywords)) {
|
||||||
|
if (keywords.some(keyword => contentLower.includes(keyword))) {
|
||||||
|
const categoryEmojis = emojiCategories[sentiment as keyof typeof emojiCategories];
|
||||||
|
if (categoryEmojis) {
|
||||||
|
const matchingEmojis = availableEmojis.filter(emoji =>
|
||||||
|
categoryEmojis.some(cat => emoji.toLowerCase().includes(cat))
|
||||||
|
);
|
||||||
|
if (matchingEmojis.length > 0) {
|
||||||
|
return matchingEmojis[Math.floor(Math.random() * matchingEmojis.length)];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fallback to random emoji from a curated list of common reactions
|
||||||
|
const commonReactions = availableEmojis.filter(emoji =>
|
||||||
|
['heart', 'thumbsup', 'fire', 'kek', 'based', 'think', 'smile', 'laugh']
|
||||||
|
.some(common => emoji.toLowerCase().includes(common))
|
||||||
|
);
|
||||||
|
|
||||||
|
if (commonReactions.length > 0) {
|
||||||
|
return commonReactions[Math.floor(Math.random() * commonReactions.length)];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Final fallback to any random emoji
|
||||||
|
return availableEmojis[Math.floor(Math.random() * availableEmojis.length)];
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Main function to handle post reactions
|
||||||
|
*/
|
||||||
|
const handlePostReaction = async (notification: Notification): Promise<void> => {
|
||||||
|
try {
|
||||||
|
const statusId = notification.status.id;
|
||||||
|
|
||||||
|
// Check if we should react to this post
|
||||||
|
if (!shouldReactToPost()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if we've already reacted
|
||||||
|
if (await hasAlreadyReacted(statusId)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get available emojis
|
||||||
|
const emojiList = await getInstanceEmojis();
|
||||||
|
if (!emojiList || emojiList.length === 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Select a smaller random pool for reactions (5-10 emojis)
|
||||||
|
const reactionPool = selectRandomEmojis(emojiList, 8);
|
||||||
|
|
||||||
|
// Get contextual emoji based on post content
|
||||||
|
const selectedEmoji = getContextualEmoji(
|
||||||
|
notification.status.pleroma.content["text/plain"],
|
||||||
|
reactionPool
|
||||||
|
);
|
||||||
|
|
||||||
|
// React to the post
|
||||||
|
const success = await reactToStatus(statusId, selectedEmoji);
|
||||||
|
|
||||||
|
if (success) {
|
||||||
|
await recordReaction(statusId, selectedEmoji);
|
||||||
|
console.log(`Reacted to status ${statusId} with :${selectedEmoji}:`);
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error(`Error handling post reaction: ${error.message}`);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
export {
|
export {
|
||||||
deleteNotification,
|
deleteNotification,
|
||||||
getInstanceEmojis,
|
getInstanceEmojis,
|
||||||
getNotifications,
|
getNotifications,
|
||||||
getStatusContext,
|
getStatusContext,
|
||||||
|
reactToStatus,
|
||||||
|
handlePostReaction,
|
||||||
|
hasAlreadyReacted,
|
||||||
};
|
};
|
||||||
|
65
src/main.ts
65
src/main.ts
@ -13,6 +13,7 @@ import {
|
|||||||
deleteNotification,
|
deleteNotification,
|
||||||
getNotifications,
|
getNotifications,
|
||||||
getStatusContext,
|
getStatusContext,
|
||||||
|
handlePostReaction,
|
||||||
} from "./api.js";
|
} from "./api.js";
|
||||||
import { storeUserData, storePromptData } from "./prisma.js";
|
import { storeUserData, storePromptData } from "./prisma.js";
|
||||||
import {
|
import {
|
||||||
@ -20,7 +21,9 @@ import {
|
|||||||
alreadyRespondedTo,
|
alreadyRespondedTo,
|
||||||
recordPendingResponse,
|
recordPendingResponse,
|
||||||
// trimInputData,
|
// trimInputData,
|
||||||
selectRandomEmoji,
|
// selectRandomEmoji,
|
||||||
|
selectRandomEmojis,
|
||||||
|
isLLMRefusal,
|
||||||
shouldContinue,
|
shouldContinue,
|
||||||
} from "./util.js";
|
} from "./util.js";
|
||||||
|
|
||||||
@ -59,7 +62,8 @@ const ollamaConfig: OllamaConfigOptions = {
|
|||||||
// https://replicate.com/blog/how-to-prompt-llama
|
// https://replicate.com/blog/how-to-prompt-llama
|
||||||
|
|
||||||
const generateOllamaRequest = async (
|
const generateOllamaRequest = async (
|
||||||
notification: Notification
|
notification: Notification,
|
||||||
|
retryAttempt: number = 0
|
||||||
): Promise<OllamaChatResponse | undefined> => {
|
): Promise<OllamaChatResponse | undefined> => {
|
||||||
const {
|
const {
|
||||||
whitelistOnly,
|
whitelistOnly,
|
||||||
@ -68,6 +72,7 @@ const generateOllamaRequest = async (
|
|||||||
ollamaUrl,
|
ollamaUrl,
|
||||||
replyWithContext,
|
replyWithContext,
|
||||||
} = envConfig;
|
} = envConfig;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
if (shouldContinue(notification)) {
|
if (shouldContinue(notification)) {
|
||||||
if (whitelistOnly && !isFromWhitelistedDomain(notification)) {
|
if (whitelistOnly && !isFromWhitelistedDomain(notification)) {
|
||||||
@ -79,6 +84,7 @@ const generateOllamaRequest = async (
|
|||||||
}
|
}
|
||||||
await recordPendingResponse(notification);
|
await recordPendingResponse(notification);
|
||||||
await storeUserData(notification);
|
await storeUserData(notification);
|
||||||
|
|
||||||
let conversationHistory: PostAncestorsForModel[] = [];
|
let conversationHistory: PostAncestorsForModel[] = [];
|
||||||
if (replyWithContext) {
|
if (replyWithContext) {
|
||||||
const contextPosts = await getStatusContext(notification.status.id);
|
const contextPosts = await getStatusContext(notification.status.id);
|
||||||
@ -93,15 +99,20 @@ const generateOllamaRequest = async (
|
|||||||
plaintext_content: ancestor.pleroma.content["text/plain"],
|
plaintext_content: ancestor.pleroma.content["text/plain"],
|
||||||
};
|
};
|
||||||
});
|
});
|
||||||
// console.log(conversationHistory);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Simplified user message (remove [/INST] as it's not needed for Llama 3)
|
|
||||||
const userMessage = `${notification.status.account.fqn} says: ${notification.status.pleroma.content["text/plain"]}`;
|
const userMessage = `${notification.status.account.fqn} says: ${notification.status.pleroma.content["text/plain"]}`;
|
||||||
|
|
||||||
let systemContent = ollamaSystemPrompt;
|
// Get random emojis for this request
|
||||||
|
const emojiList = await getInstanceEmojis();
|
||||||
|
let availableEmojis = "";
|
||||||
|
if (emojiList && emojiList.length > 0) {
|
||||||
|
const randomEmojis = selectRandomEmojis(emojiList, 20);
|
||||||
|
availableEmojis = `\n\nAvailable custom emojis you can use in your response (or use none!) (format as :emoji_name:): ${randomEmojis.join(", ")}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
let systemContent = ollamaSystemPrompt + availableEmojis;
|
||||||
if (replyWithContext) {
|
if (replyWithContext) {
|
||||||
// Simplified context instructions (avoid heavy JSON; summarize for clarity)
|
|
||||||
systemContent = `${ollamaSystemPrompt}\n\nPrevious conversation context:\n${conversationHistory
|
systemContent = `${ollamaSystemPrompt}\n\nPrevious conversation context:\n${conversationHistory
|
||||||
.map(
|
.map(
|
||||||
(post) =>
|
(post) =>
|
||||||
@ -111,10 +122,15 @@ const generateOllamaRequest = async (
|
|||||||
)
|
)
|
||||||
.join(
|
.join(
|
||||||
"\n"
|
"\n"
|
||||||
)}\nReply as if you are a party to the conversation. If '@nice-ai' is mentioned, respond directly. Prefix usernames with '@' when addressing them.`;
|
)}\nReply as if you are a party to the conversation. If '@nice-ai' is mentioned, respond directly. Prefix usernames with '@' when addressing them.${availableEmojis}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Switch to chat request format (messages array auto-handles Llama 3 template)
|
// Use different seeds for retry attempts
|
||||||
|
const currentConfig = {
|
||||||
|
...ollamaConfig,
|
||||||
|
seed: retryAttempt > 0 ? Math.floor(Math.random() * 1000000) : ollamaConfig.seed,
|
||||||
|
};
|
||||||
|
|
||||||
const ollamaRequestBody: OllamaChatRequest = {
|
const ollamaRequestBody: OllamaChatRequest = {
|
||||||
model: ollamaModel,
|
model: ollamaModel,
|
||||||
messages: [
|
messages: [
|
||||||
@ -122,16 +138,21 @@ const generateOllamaRequest = async (
|
|||||||
{ role: "user", content: userMessage },
|
{ role: "user", content: userMessage },
|
||||||
],
|
],
|
||||||
stream: false,
|
stream: false,
|
||||||
options: ollamaConfig,
|
options: currentConfig,
|
||||||
};
|
};
|
||||||
|
|
||||||
// Change endpoint to /api/chat
|
|
||||||
const response = await fetch(`${ollamaUrl}/api/chat`, {
|
const response = await fetch(`${ollamaUrl}/api/chat`, {
|
||||||
method: "POST",
|
method: "POST",
|
||||||
body: JSON.stringify(ollamaRequestBody),
|
body: JSON.stringify(ollamaRequestBody),
|
||||||
});
|
});
|
||||||
const ollamaResponse: OllamaChatResponse = await response.json();
|
const ollamaResponse: OllamaChatResponse = await response.json();
|
||||||
|
|
||||||
|
// Check for refusal and retry up to 2 times
|
||||||
|
if (isLLMRefusal(ollamaResponse.message.content) && retryAttempt < 2) {
|
||||||
|
console.log(`LLM refused to answer (attempt ${retryAttempt + 1}), retrying with different seed...`);
|
||||||
|
return generateOllamaRequest(notification, retryAttempt + 1);
|
||||||
|
}
|
||||||
|
|
||||||
await storePromptData(notification, ollamaResponse);
|
await storePromptData(notification, ollamaResponse);
|
||||||
return ollamaResponse;
|
return ollamaResponse;
|
||||||
}
|
}
|
||||||
@ -145,16 +166,11 @@ const postReplyToStatus = async (
|
|||||||
ollamaResponseBody: OllamaChatResponse
|
ollamaResponseBody: OllamaChatResponse
|
||||||
) => {
|
) => {
|
||||||
const { pleromaInstanceUrl, bearerToken } = envConfig;
|
const { pleromaInstanceUrl, bearerToken } = envConfig;
|
||||||
const emojiList = await getInstanceEmojis();
|
|
||||||
let randomEmoji;
|
|
||||||
if (emojiList) {
|
|
||||||
randomEmoji = selectRandomEmoji(emojiList);
|
|
||||||
}
|
|
||||||
try {
|
try {
|
||||||
let mentions: string[];
|
let mentions: string[];
|
||||||
const statusBody: NewStatusBody = {
|
const statusBody: NewStatusBody = {
|
||||||
content_type: "text/markdown",
|
content_type: "text/markdown",
|
||||||
status: `${ollamaResponseBody.message.content} :${randomEmoji}:`,
|
status: ollamaResponseBody.message.content,
|
||||||
in_reply_to_id: notification.status.id,
|
in_reply_to_id: notification.status.id,
|
||||||
};
|
};
|
||||||
if (
|
if (
|
||||||
@ -247,17 +263,28 @@ const beginFetchCycle = async () => {
|
|||||||
await Promise.all(
|
await Promise.all(
|
||||||
notifications.map(async (notification) => {
|
notifications.map(async (notification) => {
|
||||||
try {
|
try {
|
||||||
|
// Handle reactions first (before generating response)
|
||||||
|
// This way we can react even if response generation fails
|
||||||
|
await handlePostReaction(notification);
|
||||||
|
|
||||||
|
// Then handle the response generation as before
|
||||||
const ollamaResponse = await generateOllamaRequest(notification);
|
const ollamaResponse = await generateOllamaRequest(notification);
|
||||||
if (ollamaResponse) {
|
if (ollamaResponse) {
|
||||||
postReplyToStatus(notification, ollamaResponse);
|
await postReplyToStatus(notification, ollamaResponse);
|
||||||
}
|
}
|
||||||
} catch (error: any) {
|
} catch (error: any) {
|
||||||
throw new Error(error.message);
|
console.error(`Error processing notification ${notification.id}: ${error.message}`);
|
||||||
|
// Still try to delete the notification to avoid getting stuck
|
||||||
|
try {
|
||||||
|
await deleteNotification(notification);
|
||||||
|
} catch (deleteError: any) {
|
||||||
|
console.error(`Failed to delete notification: ${deleteError.message}`);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}, envConfig.fetchInterval); // lower intervals may cause the bot to respond multiple times to the same message, but we try to mitigate this with the deleteNotification function
|
}, envConfig.fetchInterval);
|
||||||
};
|
};
|
||||||
|
|
||||||
const beginStatusPostInterval = async () => {
|
const beginStatusPostInterval = async () => {
|
||||||
|
38
src/util.ts
38
src/util.ts
@ -98,9 +98,47 @@ const selectRandomEmoji = (emojiList: string[]) => {
|
|||||||
return emojiList[Math.floor(Math.random() * emojiList.length)];
|
return emojiList[Math.floor(Math.random() * emojiList.length)];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const selectRandomEmojis = (emojiList: string[], count: number = 20): string[] => {
|
||||||
|
if (emojiList.length <= count) return emojiList;
|
||||||
|
|
||||||
|
const shuffled = [...emojiList].sort(() => 0.5 - Math.random());
|
||||||
|
return shuffled.slice(0, count);
|
||||||
|
};
|
||||||
|
|
||||||
|
const isLLMRefusal = (response: string): boolean => {
|
||||||
|
const refusalPatterns = [
|
||||||
|
/i can't|i cannot|unable to|i'm not able to/i,
|
||||||
|
/i don't feel comfortable/i,
|
||||||
|
/i'm not comfortable/i,
|
||||||
|
/i shouldn't|i won't/i,
|
||||||
|
/that's not something i can/i,
|
||||||
|
/i'm not programmed to/i,
|
||||||
|
/i'm an ai (assistant|language model)/i,
|
||||||
|
/as an ai/i,
|
||||||
|
/i apologize, but/i,
|
||||||
|
/i must decline/i,
|
||||||
|
/that would be inappropriate/i,
|
||||||
|
/i'm not supposed to/i,
|
||||||
|
/i'd rather not/i,
|
||||||
|
/i prefer not to/i,
|
||||||
|
/against my guidelines/i,
|
||||||
|
/violates my programming/i,
|
||||||
|
];
|
||||||
|
|
||||||
|
const normalizedResponse = response.toLowerCase().trim();
|
||||||
|
|
||||||
|
// Check if response is too short (likely a refusal)
|
||||||
|
if (normalizedResponse.length < 20) return true;
|
||||||
|
|
||||||
|
// Check for refusal patterns
|
||||||
|
return refusalPatterns.some(pattern => pattern.test(normalizedResponse));
|
||||||
|
};
|
||||||
|
|
||||||
export {
|
export {
|
||||||
alreadyRespondedTo,
|
alreadyRespondedTo,
|
||||||
selectRandomEmoji,
|
selectRandomEmoji,
|
||||||
|
selectRandomEmojis,
|
||||||
|
isLLMRefusal,
|
||||||
trimInputData,
|
trimInputData,
|
||||||
recordPendingResponse,
|
recordPendingResponse,
|
||||||
isFromWhitelistedDomain,
|
isFromWhitelistedDomain,
|
||||||
|
174
types.d.ts
vendored
174
types.d.ts
vendored
@ -158,48 +158,182 @@ interface PleromaEmojiMetadata {
|
|||||||
tags: string[];
|
tags: string[];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
interface ReactionRequest {
|
||||||
|
name: string; // emoji name without colons
|
||||||
|
}
|
||||||
|
|
||||||
|
interface ReactionResponse {
|
||||||
|
name: string;
|
||||||
|
count: number;
|
||||||
|
me: boolean;
|
||||||
|
url?: string;
|
||||||
|
static_url?: string;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Experimental settings, I wouldn't recommend messing with these if you don't know how they work (I don't either)
|
* Experimental settings, I wouldn't recommend messing with these if you don't know how they work (I don't either)
|
||||||
*/
|
*/
|
||||||
export interface OllamaConfigOptions {
|
export interface OllamaConfigOptions {
|
||||||
/**
|
/**
|
||||||
* Number of tokens guaranteed to be kept in memory during response generation. Higher values leave less
|
* Number of tokens guaranteed to be kept in memory during response generation.
|
||||||
* possible room for num_ctx
|
* Higher values leave less room for num_ctx. Used to preserve important context.
|
||||||
|
* Default: 0, Range: 0-512
|
||||||
*/
|
*/
|
||||||
num_keep?: number;
|
num_keep?: number;
|
||||||
seed?: number;
|
|
||||||
/**
|
/**
|
||||||
* Sets maximum of tokens in the response
|
* Random seed for reproducible outputs. Same seed + same inputs = same output.
|
||||||
|
* Default: -1 (random), Range: any integer
|
||||||
|
*/
|
||||||
|
seed?: number;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Maximum number of tokens to generate in the response. Controls response length.
|
||||||
|
* Default: 128, Range: 1-4096+ (model dependent)
|
||||||
*/
|
*/
|
||||||
num_predict?: number;
|
num_predict?: number;
|
||||||
top_k?: number;
|
|
||||||
top_p?: number;
|
|
||||||
min_p?: number;
|
|
||||||
typical_p?: number;
|
|
||||||
repeat_last_n?: number;
|
|
||||||
/**
|
/**
|
||||||
* How close of a response should the response be to the original prompt - lower = more focused response
|
* Limits token selection to top K most probable tokens. Reduces randomness.
|
||||||
|
* Default: 40, Range: 1-100 (higher = more diverse)
|
||||||
|
*/
|
||||||
|
top_k?: number;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Nucleus sampling - cumulative probability cutoff for token selection.
|
||||||
|
* Default: 0.9, Range: 0.0-1.0 (lower = more focused)
|
||||||
|
*/
|
||||||
|
top_p?: number;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Alternative to top_p - minimum probability threshold for tokens.
|
||||||
|
* Default: 0.0, Range: 0.0-1.0 (higher = more selective)
|
||||||
|
*/
|
||||||
|
min_p?: number;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Typical sampling - targets tokens with "typical" probability mass.
|
||||||
|
* Default: 1.0 (disabled), Range: 0.0-1.0 (lower = less random)
|
||||||
|
*/
|
||||||
|
typical_p?: number;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Number of previous tokens to consider for repetition penalty.
|
||||||
|
* Default: 64, Range: 0-512
|
||||||
|
*/
|
||||||
|
repeat_last_n?: number;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Randomness/creativity control. Lower = more deterministic, higher = more creative.
|
||||||
|
* Default: 0.8, Range: 0.0-2.0 (sweet spot: 0.1-1.2)
|
||||||
*/
|
*/
|
||||||
temperature?: number;
|
temperature?: number;
|
||||||
repeat_penalty?: number;
|
|
||||||
presence_penalty?: number;
|
|
||||||
frequency_penalty?: number;
|
|
||||||
mirostat?: number;
|
|
||||||
mirostat_tau?: number;
|
|
||||||
mirostat_eta?: number;
|
|
||||||
penalize_newline?: boolean;
|
|
||||||
stop?: string[];
|
|
||||||
numa?: boolean;
|
|
||||||
/**
|
/**
|
||||||
* Number of tokens for the prompt to keep in memory for the response, minus the value of num_keep
|
* Penalty for repeating tokens. Higher values reduce repetition.
|
||||||
|
* Default: 1.1, Range: 0.0-2.0 (1.0 = no penalty)
|
||||||
|
*/
|
||||||
|
repeat_penalty?: number;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Penalty for using tokens that have already appeared (OpenAI-style).
|
||||||
|
* Default: 0.0, Range: -2.0 to 2.0
|
||||||
|
*/
|
||||||
|
presence_penalty?: number;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Penalty proportional to token frequency in text (OpenAI-style).
|
||||||
|
* Default: 0.0, Range: -2.0 to 2.0
|
||||||
|
*/
|
||||||
|
frequency_penalty?: number;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Enables Mirostat sampling algorithm (0=disabled, 1=v1, 2=v2).
|
||||||
|
* Default: 0, Range: 0, 1, or 2
|
||||||
|
*/
|
||||||
|
mirostat?: number;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Target entropy for Mirostat. Controls coherence vs creativity balance.
|
||||||
|
* Default: 5.0, Range: 0.0-10.0
|
||||||
|
*/
|
||||||
|
mirostat_tau?: number;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Learning rate for Mirostat. How quickly it adapts.
|
||||||
|
* Default: 0.1, Range: 0.001-1.0
|
||||||
|
*/
|
||||||
|
mirostat_eta?: number;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Apply penalty to newline tokens to control formatting.
|
||||||
|
* Default: true
|
||||||
|
*/
|
||||||
|
penalize_newline?: boolean;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Array of strings that will stop generation when encountered.
|
||||||
|
* Default: [], Example: ["\n", "User:", "###"]
|
||||||
|
*/
|
||||||
|
stop?: string[];
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Enable NUMA (Non-Uniform Memory Access) optimization.
|
||||||
|
* Default: false (Linux systems may benefit from true)
|
||||||
|
*/
|
||||||
|
numa?: boolean;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Context window size - total tokens for prompt + response.
|
||||||
|
* Default: 2048, Range: 512-32768+ (model dependent, affects memory usage)
|
||||||
*/
|
*/
|
||||||
num_ctx?: number;
|
num_ctx?: number;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Batch size for prompt processing. Higher = faster but more memory.
|
||||||
|
* Default: 512, Range: 1-2048
|
||||||
|
*/
|
||||||
num_batch?: number;
|
num_batch?: number;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Number of GPU layers to offload. -1 = auto, 0 = CPU only.
|
||||||
|
* Default: -1, Range: -1 to model layer count
|
||||||
|
*/
|
||||||
num_gpu?: number;
|
num_gpu?: number;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Primary GPU device ID for multi-GPU setups.
|
||||||
|
* Default: 0, Range: 0 to (GPU count - 1)
|
||||||
|
*/
|
||||||
main_gpu?: number;
|
main_gpu?: number;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Optimize for low VRAM usage at cost of speed.
|
||||||
|
* Default: false
|
||||||
|
*/
|
||||||
low_vram?: boolean;
|
low_vram?: boolean;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Only load vocabulary, skip weights. For tokenization only.
|
||||||
|
* Default: false
|
||||||
|
*/
|
||||||
vocab_only?: boolean;
|
vocab_only?: boolean;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Use memory mapping for model files (faster loading).
|
||||||
|
* Default: true
|
||||||
|
*/
|
||||||
use_mmap?: boolean;
|
use_mmap?: boolean;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Lock model in memory to prevent swapping.
|
||||||
|
* Default: false (enable for consistent performance)
|
||||||
|
*/
|
||||||
use_mlock?: boolean;
|
use_mlock?: boolean;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Number of CPU threads for inference.
|
||||||
|
* Default: auto-detected, Range: 1 to CPU core count
|
||||||
|
*/
|
||||||
num_thread?: number;
|
num_thread?: number;
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user