added emote reaction support, better emote support in reactions, implemented llm refusal and retry logic, improved some inline documentation
This commit is contained in:
		| @ -28,3 +28,14 @@ model User { | ||||
|   userFqn         String    @unique | ||||
|   lastRespondedTo DateTime? | ||||
| } | ||||
|  | ||||
| model Reaction { | ||||
|   id         Int      @id @default(autoincrement()) | ||||
|   statusId   String   // The Pleroma status ID we reacted to | ||||
|   emojiName  String   // The emoji we used to react | ||||
|   reactedAt  DateTime @default(now()) | ||||
|   createdAt  DateTime @default(now()) | ||||
|    | ||||
|   @@unique([statusId]) // Prevent multiple reactions to same status | ||||
|   @@map("reactions") | ||||
| } | ||||
							
								
								
									
										177
									
								
								src/api.ts
									
									
									
									
									
								
							
							
						
						
									
										177
									
								
								src/api.ts
									
									
									
									
									
								
							| @ -1,5 +1,7 @@ | ||||
| import { envConfig, prisma } from "./main.js"; | ||||
| import { PleromaEmoji, Notification, ContextResponse } from "../types.js"; | ||||
| import { selectRandomEmojis } from "./util.js"; | ||||
|  | ||||
|  | ||||
| const getNotifications = async () => { | ||||
|   const { bearerToken, pleromaInstanceUrl } = envConfig; | ||||
| @ -98,9 +100,184 @@ const deleteNotification = async (notification: Notification) => { | ||||
|   } | ||||
| }; | ||||
|  | ||||
|  | ||||
| /** | ||||
|  * React to a status with a random emoji | ||||
|  */ | ||||
| const reactToStatus = async (statusId: string, emojiName: string): Promise<boolean> => { | ||||
|   const { bearerToken, pleromaInstanceUrl } = envConfig; | ||||
|    | ||||
|   try { | ||||
|     const response = await fetch( | ||||
|       `${pleromaInstanceUrl}/api/v1/statuses/${statusId}/react/${emojiName}`, | ||||
|       { | ||||
|         method: "PUT", | ||||
|         headers: { | ||||
|           Authorization: `Bearer ${bearerToken}`, | ||||
|           "Content-Type": "application/json", | ||||
|         }, | ||||
|       } | ||||
|     ); | ||||
|  | ||||
|     if (!response.ok) { | ||||
|       console.error(`Failed to react to status ${statusId}: ${response.status} - ${response.statusText}`); | ||||
|       return false; | ||||
|     } | ||||
|  | ||||
|     return true; | ||||
|   } catch (error: any) { | ||||
|     console.error(`Error reacting to status ${statusId}: ${error.message}`); | ||||
|     return false; | ||||
|   } | ||||
| }; | ||||
|  | ||||
| /** | ||||
|  * Check if we've already reacted to a status | ||||
|  */ | ||||
| const hasAlreadyReacted = async (statusId: string): Promise<boolean> => { | ||||
|   try { | ||||
|     const reaction = await prisma.reaction.findFirst({ | ||||
|       where: { statusId: statusId }, | ||||
|     }); | ||||
|     return !!reaction; | ||||
|   } catch (error: any) { | ||||
|     console.error(`Error checking reaction status: ${error.message}`); | ||||
|     return true; // Assume we've reacted to avoid spamming on error | ||||
|   } | ||||
| }; | ||||
|  | ||||
| /** | ||||
|  * Record that we've reacted to a status | ||||
|  */ | ||||
| const recordReaction = async (statusId: string, emojiName: string): Promise<void> => { | ||||
|   try { | ||||
|     await prisma.reaction.create({ | ||||
|       data: { | ||||
|         statusId: statusId, | ||||
|         emojiName: emojiName, | ||||
|         reactedAt: new Date(), | ||||
|       }, | ||||
|     }); | ||||
|   } catch (error: any) { | ||||
|     console.error(`Error recording reaction: ${error.message}`); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| /** | ||||
|  * Decide whether to react to a post (not every post gets a reaction) | ||||
|  */ | ||||
| const shouldReactToPost = (): boolean => { | ||||
|   // React to roughly 30% of posts | ||||
|   return Math.random() < 0.3; | ||||
| }; | ||||
|  | ||||
| /** | ||||
|  * Get appropriate reaction emojis based on content sentiment/keywords | ||||
|  */ | ||||
| const getContextualEmoji = (content: string, availableEmojis: string[]): string => { | ||||
|   const contentLower = content.toLowerCase(); | ||||
|    | ||||
|   // Define emoji categories with keywords | ||||
|   const emojiCategories = { | ||||
|     positive: ['happy', 'smile', 'joy', 'love', 'heart', 'thumbsup', 'fire', 'based'], | ||||
|     negative: ['sad', 'cry', 'angry', 'rage', 'disappointed', 'cringe'], | ||||
|     thinking: ['think', 'hmm', 'brain', 'smart', 'curious'], | ||||
|     laughing: ['laugh', 'lol', 'kek', 'funny', 'haha', 'rofl'], | ||||
|     agreement: ['yes', 'agree', 'nod', 'correct', 'true', 'based'], | ||||
|     surprise: ['wow', 'amazing', 'surprised', 'shock', 'omg'], | ||||
|   }; | ||||
|  | ||||
|   // Keywords that might indicate sentiment | ||||
|   const sentimentKeywords = { | ||||
|     positive: ['good', 'great', 'awesome', 'nice', 'love', 'happy', 'excellent', 'perfect'], | ||||
|     negative: ['bad', 'terrible', 'hate', 'awful', 'horrible', 'worst', 'sucks'], | ||||
|     funny: ['lol', 'haha', 'funny', 'hilarious', 'joke', 'meme'], | ||||
|     question: ['?', 'what', 'how', 'why', 'when', 'where'], | ||||
|     agreement: ['yes', 'exactly', 'true', 'right', 'correct', 'agree'], | ||||
|     thinking: ['think', 'consider', 'maybe', 'perhaps', 'hmm', 'interesting'], | ||||
|   }; | ||||
|  | ||||
|   // Check content sentiment and find matching emojis | ||||
|   for (const [sentiment, keywords] of Object.entries(sentimentKeywords)) { | ||||
|     if (keywords.some(keyword => contentLower.includes(keyword))) { | ||||
|       const categoryEmojis = emojiCategories[sentiment as keyof typeof emojiCategories]; | ||||
|       if (categoryEmojis) { | ||||
|         const matchingEmojis = availableEmojis.filter(emoji =>  | ||||
|           categoryEmojis.some(cat => emoji.toLowerCase().includes(cat)) | ||||
|         ); | ||||
|         if (matchingEmojis.length > 0) { | ||||
|           return matchingEmojis[Math.floor(Math.random() * matchingEmojis.length)]; | ||||
|         } | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   // Fallback to random emoji from a curated list of common reactions | ||||
|   const commonReactions = availableEmojis.filter(emoji =>  | ||||
|     ['heart', 'thumbsup', 'fire', 'kek', 'based', 'think', 'smile', 'laugh'] | ||||
|       .some(common => emoji.toLowerCase().includes(common)) | ||||
|   ); | ||||
|  | ||||
|   if (commonReactions.length > 0) { | ||||
|     return commonReactions[Math.floor(Math.random() * commonReactions.length)]; | ||||
|   } | ||||
|  | ||||
|   // Final fallback to any random emoji | ||||
|   return availableEmojis[Math.floor(Math.random() * availableEmojis.length)]; | ||||
| }; | ||||
|  | ||||
| /** | ||||
|  * Main function to handle post reactions | ||||
|  */ | ||||
| const handlePostReaction = async (notification: Notification): Promise<void> => { | ||||
|   try { | ||||
|     const statusId = notification.status.id; | ||||
|      | ||||
|     // Check if we should react to this post | ||||
|     if (!shouldReactToPost()) { | ||||
|       return; | ||||
|     } | ||||
|  | ||||
|     // Check if we've already reacted | ||||
|     if (await hasAlreadyReacted(statusId)) { | ||||
|       return; | ||||
|     } | ||||
|  | ||||
|     // Get available emojis | ||||
|     const emojiList = await getInstanceEmojis(); | ||||
|     if (!emojiList || emojiList.length === 0) { | ||||
|       return; | ||||
|     } | ||||
|  | ||||
|     // Select a smaller random pool for reactions (5-10 emojis) | ||||
|     const reactionPool = selectRandomEmojis(emojiList, 8); | ||||
|      | ||||
|     // Get contextual emoji based on post content | ||||
|     const selectedEmoji = getContextualEmoji( | ||||
|       notification.status.pleroma.content["text/plain"],  | ||||
|       reactionPool | ||||
|     ); | ||||
|  | ||||
|     // React to the post | ||||
|     const success = await reactToStatus(statusId, selectedEmoji); | ||||
|      | ||||
|     if (success) { | ||||
|       await recordReaction(statusId, selectedEmoji); | ||||
|       console.log(`Reacted to status ${statusId} with :${selectedEmoji}:`); | ||||
|     } | ||||
|  | ||||
|   } catch (error: any) { | ||||
|     console.error(`Error handling post reaction: ${error.message}`); | ||||
|   } | ||||
| }; | ||||
|  | ||||
|  | ||||
| export { | ||||
|   deleteNotification, | ||||
|   getInstanceEmojis, | ||||
|   getNotifications, | ||||
|   getStatusContext, | ||||
|   reactToStatus, | ||||
|   handlePostReaction, | ||||
|   hasAlreadyReacted, | ||||
| }; | ||||
|  | ||||
							
								
								
									
										65
									
								
								src/main.ts
									
									
									
									
									
								
							
							
						
						
									
										65
									
								
								src/main.ts
									
									
									
									
									
								
							| @ -13,6 +13,7 @@ import { | ||||
|   deleteNotification, | ||||
|   getNotifications, | ||||
|   getStatusContext, | ||||
|   handlePostReaction, | ||||
| } from "./api.js"; | ||||
| import { storeUserData, storePromptData } from "./prisma.js"; | ||||
| import { | ||||
| @ -20,7 +21,9 @@ import { | ||||
|   alreadyRespondedTo, | ||||
|   recordPendingResponse, | ||||
|   // trimInputData, | ||||
|   selectRandomEmoji, | ||||
|   // selectRandomEmoji, | ||||
|   selectRandomEmojis, | ||||
|   isLLMRefusal, | ||||
|   shouldContinue, | ||||
| } from "./util.js"; | ||||
|  | ||||
| @ -59,7 +62,8 @@ const ollamaConfig: OllamaConfigOptions = { | ||||
| // https://replicate.com/blog/how-to-prompt-llama | ||||
|  | ||||
| const generateOllamaRequest = async ( | ||||
|   notification: Notification | ||||
|   notification: Notification, | ||||
|   retryAttempt: number = 0 | ||||
| ): Promise<OllamaChatResponse | undefined> => { | ||||
|   const { | ||||
|     whitelistOnly, | ||||
| @ -68,6 +72,7 @@ const generateOllamaRequest = async ( | ||||
|     ollamaUrl, | ||||
|     replyWithContext, | ||||
|   } = envConfig; | ||||
|    | ||||
|   try { | ||||
|     if (shouldContinue(notification)) { | ||||
|       if (whitelistOnly && !isFromWhitelistedDomain(notification)) { | ||||
| @ -79,6 +84,7 @@ const generateOllamaRequest = async ( | ||||
|       } | ||||
|       await recordPendingResponse(notification); | ||||
|       await storeUserData(notification); | ||||
|        | ||||
|       let conversationHistory: PostAncestorsForModel[] = []; | ||||
|       if (replyWithContext) { | ||||
|         const contextPosts = await getStatusContext(notification.status.id); | ||||
| @ -93,15 +99,20 @@ const generateOllamaRequest = async ( | ||||
|             plaintext_content: ancestor.pleroma.content["text/plain"], | ||||
|           }; | ||||
|         }); | ||||
|         // console.log(conversationHistory); | ||||
|       } | ||||
|  | ||||
|       // Simplified user message (remove [/INST] as it's not needed for Llama 3) | ||||
|       const userMessage = `${notification.status.account.fqn} says: ${notification.status.pleroma.content["text/plain"]}`; | ||||
|  | ||||
|       let systemContent = ollamaSystemPrompt; | ||||
|       // Get random emojis for this request | ||||
|       const emojiList = await getInstanceEmojis(); | ||||
|       let availableEmojis = ""; | ||||
|       if (emojiList && emojiList.length > 0) { | ||||
|         const randomEmojis = selectRandomEmojis(emojiList, 20); | ||||
|         availableEmojis = `\n\nAvailable custom emojis you can use in your response (or use none!) (format as :emoji_name:): ${randomEmojis.join(", ")}`; | ||||
|       } | ||||
|  | ||||
|       let systemContent = ollamaSystemPrompt + availableEmojis; | ||||
|       if (replyWithContext) { | ||||
|         // Simplified context instructions (avoid heavy JSON; summarize for clarity) | ||||
|         systemContent = `${ollamaSystemPrompt}\n\nPrevious conversation context:\n${conversationHistory | ||||
|           .map( | ||||
|             (post) => | ||||
| @ -111,10 +122,15 @@ const generateOllamaRequest = async ( | ||||
|           ) | ||||
|           .join( | ||||
|             "\n" | ||||
|           )}\nReply as if you are a party to the conversation. If '@nice-ai' is mentioned, respond directly. Prefix usernames with '@' when addressing them.`; | ||||
|           )}\nReply as if you are a party to the conversation. If '@nice-ai' is mentioned, respond directly. Prefix usernames with '@' when addressing them.${availableEmojis}`; | ||||
|       } | ||||
|  | ||||
|       // Switch to chat request format (messages array auto-handles Llama 3 template) | ||||
|       // Use different seeds for retry attempts | ||||
|       const currentConfig = { | ||||
|         ...ollamaConfig, | ||||
|         seed: retryAttempt > 0 ? Math.floor(Math.random() * 1000000) : ollamaConfig.seed, | ||||
|       }; | ||||
|  | ||||
|       const ollamaRequestBody: OllamaChatRequest = { | ||||
|         model: ollamaModel, | ||||
|         messages: [ | ||||
| @ -122,16 +138,21 @@ const generateOllamaRequest = async ( | ||||
|           { role: "user", content: userMessage }, | ||||
|         ], | ||||
|         stream: false, | ||||
|         options: ollamaConfig, | ||||
|         options: currentConfig, | ||||
|       }; | ||||
|  | ||||
|       // Change endpoint to /api/chat | ||||
|       const response = await fetch(`${ollamaUrl}/api/chat`, { | ||||
|         method: "POST", | ||||
|         body: JSON.stringify(ollamaRequestBody), | ||||
|       }); | ||||
|       const ollamaResponse: OllamaChatResponse = await response.json(); | ||||
|  | ||||
|       // Check for refusal and retry up to 2 times | ||||
|       if (isLLMRefusal(ollamaResponse.message.content) && retryAttempt < 2) { | ||||
|         console.log(`LLM refused to answer (attempt ${retryAttempt + 1}), retrying with different seed...`); | ||||
|         return generateOllamaRequest(notification, retryAttempt + 1); | ||||
|       } | ||||
|  | ||||
|       await storePromptData(notification, ollamaResponse); | ||||
|       return ollamaResponse; | ||||
|     } | ||||
| @ -145,16 +166,11 @@ const postReplyToStatus = async ( | ||||
|   ollamaResponseBody: OllamaChatResponse | ||||
| ) => { | ||||
|   const { pleromaInstanceUrl, bearerToken } = envConfig; | ||||
|   const emojiList = await getInstanceEmojis(); | ||||
|   let randomEmoji; | ||||
|   if (emojiList) { | ||||
|     randomEmoji = selectRandomEmoji(emojiList); | ||||
|   } | ||||
|   try { | ||||
|     let mentions: string[]; | ||||
|     const statusBody: NewStatusBody = { | ||||
|       content_type: "text/markdown", | ||||
|       status: `${ollamaResponseBody.message.content} :${randomEmoji}:`, | ||||
|       status: ollamaResponseBody.message.content, | ||||
|       in_reply_to_id: notification.status.id, | ||||
|     }; | ||||
|     if ( | ||||
| @ -247,17 +263,28 @@ const beginFetchCycle = async () => { | ||||
|       await Promise.all( | ||||
|         notifications.map(async (notification) => { | ||||
|           try { | ||||
|             // Handle reactions first (before generating response) | ||||
|             // This way we can react even if response generation fails | ||||
|             await handlePostReaction(notification); | ||||
|              | ||||
|             // Then handle the response generation as before | ||||
|             const ollamaResponse = await generateOllamaRequest(notification); | ||||
|             if (ollamaResponse) { | ||||
|               postReplyToStatus(notification, ollamaResponse); | ||||
|               await postReplyToStatus(notification, ollamaResponse); | ||||
|             } | ||||
|           } catch (error: any) { | ||||
|             throw new Error(error.message); | ||||
|             console.error(`Error processing notification ${notification.id}: ${error.message}`); | ||||
|             // Still try to delete the notification to avoid getting stuck | ||||
|             try { | ||||
|               await deleteNotification(notification); | ||||
|             } catch (deleteError: any) { | ||||
|               console.error(`Failed to delete notification: ${deleteError.message}`); | ||||
|             } | ||||
|           } | ||||
|         }) | ||||
|       ); | ||||
|     } | ||||
|   }, envConfig.fetchInterval); // lower intervals may cause the bot to respond multiple times to the same message, but we try to mitigate this with the deleteNotification function | ||||
|   }, envConfig.fetchInterval); | ||||
| }; | ||||
|  | ||||
| const beginStatusPostInterval = async () => { | ||||
|  | ||||
							
								
								
									
										38
									
								
								src/util.ts
									
									
									
									
									
								
							
							
						
						
									
										38
									
								
								src/util.ts
									
									
									
									
									
								
							| @ -98,9 +98,47 @@ const selectRandomEmoji = (emojiList: string[]) => { | ||||
|   return emojiList[Math.floor(Math.random() * emojiList.length)]; | ||||
| }; | ||||
|  | ||||
| const selectRandomEmojis = (emojiList: string[], count: number = 20): string[] => { | ||||
|   if (emojiList.length <= count) return emojiList; | ||||
|    | ||||
|   const shuffled = [...emojiList].sort(() => 0.5 - Math.random()); | ||||
|   return shuffled.slice(0, count); | ||||
| }; | ||||
|  | ||||
| const isLLMRefusal = (response: string): boolean => { | ||||
|   const refusalPatterns = [ | ||||
|     /i can't|i cannot|unable to|i'm not able to/i, | ||||
|     /i don't feel comfortable/i, | ||||
|     /i'm not comfortable/i, | ||||
|     /i shouldn't|i won't/i, | ||||
|     /that's not something i can/i, | ||||
|     /i'm not programmed to/i, | ||||
|     /i'm an ai (assistant|language model)/i, | ||||
|     /as an ai/i, | ||||
|     /i apologize, but/i, | ||||
|     /i must decline/i, | ||||
|     /that would be inappropriate/i, | ||||
|     /i'm not supposed to/i, | ||||
|     /i'd rather not/i, | ||||
|     /i prefer not to/i, | ||||
|     /against my guidelines/i, | ||||
|     /violates my programming/i, | ||||
|   ]; | ||||
|  | ||||
|   const normalizedResponse = response.toLowerCase().trim(); | ||||
|    | ||||
|   // Check if response is too short (likely a refusal) | ||||
|   if (normalizedResponse.length < 20) return true; | ||||
|    | ||||
|   // Check for refusal patterns | ||||
|   return refusalPatterns.some(pattern => pattern.test(normalizedResponse)); | ||||
| }; | ||||
|  | ||||
| export { | ||||
|   alreadyRespondedTo, | ||||
|   selectRandomEmoji, | ||||
|   selectRandomEmojis, | ||||
|   isLLMRefusal, | ||||
|   trimInputData, | ||||
|   recordPendingResponse, | ||||
|   isFromWhitelistedDomain, | ||||
|  | ||||
							
								
								
									
										174
									
								
								types.d.ts
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										174
									
								
								types.d.ts
									
									
									
									
										vendored
									
									
								
							| @ -158,48 +158,182 @@ interface PleromaEmojiMetadata { | ||||
|   tags: string[]; | ||||
| } | ||||
|  | ||||
| interface ReactionRequest { | ||||
|   name: string; // emoji name without colons | ||||
| } | ||||
|  | ||||
| interface ReactionResponse { | ||||
|   name: string; | ||||
|   count: number; | ||||
|   me: boolean; | ||||
|   url?: string; | ||||
|   static_url?: string; | ||||
| } | ||||
|  | ||||
| /** | ||||
|  * Experimental settings, I wouldn't recommend messing with these if you don't know how they work (I don't either) | ||||
|  */ | ||||
| export interface OllamaConfigOptions { | ||||
|   /** | ||||
|    * Number of tokens guaranteed to be kept in memory during response generation. Higher values leave less | ||||
|    * possible room for num_ctx | ||||
|    * Number of tokens guaranteed to be kept in memory during response generation.  | ||||
|    * Higher values leave less room for num_ctx. Used to preserve important context. | ||||
|    * Default: 0, Range: 0-512 | ||||
|    */ | ||||
|   num_keep?: number; | ||||
|   seed?: number; | ||||
|  | ||||
|   /** | ||||
|    * Sets maximum of tokens in the response | ||||
|    * Random seed for reproducible outputs. Same seed + same inputs = same output. | ||||
|    * Default: -1 (random), Range: any integer | ||||
|    */ | ||||
|   seed?: number; | ||||
|  | ||||
|   /** | ||||
|    * Maximum number of tokens to generate in the response. Controls response length. | ||||
|    * Default: 128, Range: 1-4096+ (model dependent) | ||||
|    */ | ||||
|   num_predict?: number; | ||||
|   top_k?: number; | ||||
|   top_p?: number; | ||||
|   min_p?: number; | ||||
|   typical_p?: number; | ||||
|   repeat_last_n?: number; | ||||
|  | ||||
|   /** | ||||
|    * How close of a response should the response be to the original prompt - lower = more focused response | ||||
|    * Limits token selection to top K most probable tokens. Reduces randomness. | ||||
|    * Default: 40, Range: 1-100 (higher = more diverse) | ||||
|    */ | ||||
|   top_k?: number; | ||||
|  | ||||
|   /** | ||||
|    * Nucleus sampling - cumulative probability cutoff for token selection. | ||||
|    * Default: 0.9, Range: 0.0-1.0 (lower = more focused) | ||||
|    */ | ||||
|   top_p?: number; | ||||
|  | ||||
|   /** | ||||
|    * Alternative to top_p - minimum probability threshold for tokens. | ||||
|    * Default: 0.0, Range: 0.0-1.0 (higher = more selective) | ||||
|    */ | ||||
|   min_p?: number; | ||||
|  | ||||
|   /** | ||||
|    * Typical sampling - targets tokens with "typical" probability mass. | ||||
|    * Default: 1.0 (disabled), Range: 0.0-1.0 (lower = less random) | ||||
|    */ | ||||
|   typical_p?: number; | ||||
|  | ||||
|   /** | ||||
|    * Number of previous tokens to consider for repetition penalty. | ||||
|    * Default: 64, Range: 0-512 | ||||
|    */ | ||||
|   repeat_last_n?: number; | ||||
|  | ||||
|   /** | ||||
|    * Randomness/creativity control. Lower = more deterministic, higher = more creative. | ||||
|    * Default: 0.8, Range: 0.0-2.0 (sweet spot: 0.1-1.2) | ||||
|    */ | ||||
|   temperature?: number; | ||||
|   repeat_penalty?: number; | ||||
|   presence_penalty?: number; | ||||
|   frequency_penalty?: number; | ||||
|   mirostat?: number; | ||||
|   mirostat_tau?: number; | ||||
|   mirostat_eta?: number; | ||||
|   penalize_newline?: boolean; | ||||
|   stop?: string[]; | ||||
|   numa?: boolean; | ||||
|  | ||||
|   /** | ||||
|    * Number of tokens for the prompt to keep in memory for the response, minus the value of num_keep | ||||
|    * Penalty for repeating tokens. Higher values reduce repetition. | ||||
|    * Default: 1.1, Range: 0.0-2.0 (1.0 = no penalty) | ||||
|    */ | ||||
|   repeat_penalty?: number; | ||||
|  | ||||
|   /** | ||||
|    * Penalty for using tokens that have already appeared (OpenAI-style). | ||||
|    * Default: 0.0, Range: -2.0 to 2.0 | ||||
|    */ | ||||
|   presence_penalty?: number; | ||||
|  | ||||
|   /** | ||||
|    * Penalty proportional to token frequency in text (OpenAI-style). | ||||
|    * Default: 0.0, Range: -2.0 to 2.0 | ||||
|    */ | ||||
|   frequency_penalty?: number; | ||||
|  | ||||
|   /** | ||||
|    * Enables Mirostat sampling algorithm (0=disabled, 1=v1, 2=v2). | ||||
|    * Default: 0, Range: 0, 1, or 2 | ||||
|    */ | ||||
|   mirostat?: number; | ||||
|  | ||||
|   /** | ||||
|    * Target entropy for Mirostat. Controls coherence vs creativity balance. | ||||
|    * Default: 5.0, Range: 0.0-10.0 | ||||
|    */ | ||||
|   mirostat_tau?: number; | ||||
|  | ||||
|   /** | ||||
|    * Learning rate for Mirostat. How quickly it adapts. | ||||
|    * Default: 0.1, Range: 0.001-1.0 | ||||
|    */ | ||||
|   mirostat_eta?: number; | ||||
|  | ||||
|   /** | ||||
|    * Apply penalty to newline tokens to control formatting. | ||||
|    * Default: true | ||||
|    */ | ||||
|   penalize_newline?: boolean; | ||||
|  | ||||
|   /** | ||||
|    * Array of strings that will stop generation when encountered. | ||||
|    * Default: [], Example: ["\n", "User:", "###"] | ||||
|    */ | ||||
|   stop?: string[]; | ||||
|  | ||||
|   /** | ||||
|    * Enable NUMA (Non-Uniform Memory Access) optimization. | ||||
|    * Default: false (Linux systems may benefit from true) | ||||
|    */ | ||||
|   numa?: boolean; | ||||
|  | ||||
|   /** | ||||
|    * Context window size - total tokens for prompt + response. | ||||
|    * Default: 2048, Range: 512-32768+ (model dependent, affects memory usage) | ||||
|    */ | ||||
|   num_ctx?: number; | ||||
|  | ||||
|   /** | ||||
|    * Batch size for prompt processing. Higher = faster but more memory. | ||||
|    * Default: 512, Range: 1-2048 | ||||
|    */ | ||||
|   num_batch?: number; | ||||
|  | ||||
|   /** | ||||
|    * Number of GPU layers to offload. -1 = auto, 0 = CPU only. | ||||
|    * Default: -1, Range: -1 to model layer count | ||||
|    */ | ||||
|   num_gpu?: number; | ||||
|  | ||||
|   /** | ||||
|    * Primary GPU device ID for multi-GPU setups. | ||||
|    * Default: 0, Range: 0 to (GPU count - 1) | ||||
|    */ | ||||
|   main_gpu?: number; | ||||
|  | ||||
|   /** | ||||
|    * Optimize for low VRAM usage at cost of speed. | ||||
|    * Default: false | ||||
|    */ | ||||
|   low_vram?: boolean; | ||||
|  | ||||
|   /** | ||||
|    * Only load vocabulary, skip weights. For tokenization only. | ||||
|    * Default: false | ||||
|    */ | ||||
|   vocab_only?: boolean; | ||||
|  | ||||
|   /** | ||||
|    * Use memory mapping for model files (faster loading). | ||||
|    * Default: true | ||||
|    */ | ||||
|   use_mmap?: boolean; | ||||
|  | ||||
|   /** | ||||
|    * Lock model in memory to prevent swapping. | ||||
|    * Default: false (enable for consistent performance) | ||||
|    */ | ||||
|   use_mlock?: boolean; | ||||
|  | ||||
|   /** | ||||
|    * Number of CPU threads for inference. | ||||
|    * Default: auto-detected, Range: 1 to CPU core count | ||||
|    */ | ||||
|   num_thread?: number; | ||||
| } | ||||
|  | ||||
		Reference in New Issue
	
	Block a user