export interface Notification { account: Account; status: Status; id: string; type: string; created_at: string; } export interface ContextResponse { ancestors: ContextObject[]; descendents: ContextObject[]; } export interface PostAncestorsForModel { account_fqn: string; mentions: string[]; plaintext_content: string; } interface ContextAccountObject { acct: string; avatar: string; bot: boolean; display_name: string; followers_count: number; following_count: number; fqn: string; id: string; } export interface ContextObject { content: string; id: string; in_reply_to_account_id: string | null; in_reply_to_id: string | null; media_attachments: string[]; mentions: Mention[]; pleroma: PleromaObjectInResponse; visibility: "public" | "private" | "unlisted"; uri: string; account: ContextAccountObject; } export interface NewStatusBody { content_type: "application/json" | "text/markdown"; in_reply_to_id?: string; media_ids?: string[]; sensitive?: "true" | "false" | boolean; status: string; to?: string[]; } export interface Account { acct: string; // nickname bot: boolean; display_name: string; fqn: string; // user@instance.tld id: string; // user ID note?: string; // bio } export interface OllamaRequest { /** * Name of the Ollama model to generate a response from. Must be a valid and locally installed model. */ model: string; /** * The prompt sent from the end-user. */ prompt: string; /** * Whatever system prompt you'd like to add to the model to make it more unique, or force it to respond a certain way. */ system?: string; /** * Whether to stream responses from the API, or have it sent all as one payload. */ stream?: boolean = false; /** * Ollama configuration options */ options?: OllamaConfigOptions; } export interface OllamaChatRequest { model: string; messages: OllamaMessages[]; stream?: boolean = false; options?: OllamaConfigOptions; } export interface OllamaChatResponse { model: string; created_at: string; message: OllamaChatResponseMessage; done_reason: "string"; done: boolean; total_duration: number; load_duration: number; prompt_eval_count: number; prompt_eval_duration: number; eval_count: number; eval_duration: number; } interface OllamaChatResponseMessage { role: "assistant"; content: string; } interface OllamaMessages { role: "system" | "user"; content: string; } export interface OllamaResponse { model: string; created_at: Date | string; response: string; done: boolean; done_reason: string; } export interface Status { account: Account; content: string; // content of the post created_at: string | Date; // when the post was created id: string; // ID of the reply itself in_reply_to_account_id: string; // account ID of the reply in_reply_to_id: string; // status that the user has replied to mentions: Mention[]; // array of mentions pleroma: PleromaObjectInResponse; visibility: "private" | "public" | "unlisted"; } interface PleromaObjectInResponse { content: { "text/plain": string }; context: string; conversation_id: number; direct_conversation_id: number | null; local: boolean; in_reply_to_account_acct: string; } export interface Mention { acct: string; id: string; url: string; username: string; } export interface PleromaEmoji { [emojiName: string]: PleromaEmojiMetadata; } interface PleromaEmojiMetadata { image_url: string; tags: string[]; } interface ReactionRequest { name: string; // emoji name without colons } interface ReactionResponse { name: string; count: number; me: boolean; url?: string; static_url?: string; } /** * Experimental settings, I wouldn't recommend messing with these if you don't know how they work (I don't either) */ export interface OllamaConfigOptions { /** * Number of tokens guaranteed to be kept in memory during response generation. * Higher values leave less room for num_ctx. Used to preserve important context. * Default: 0, Range: 0-512 */ num_keep?: number; /** * Random seed for reproducible outputs. Same seed + same inputs = same output. * Default: -1 (random), Range: any integer */ seed?: number; /** * Maximum number of tokens to generate in the response. Controls response length. * Default: 128, Range: 1-4096+ (model dependent) */ num_predict?: number; /** * Limits token selection to top K most probable tokens. Reduces randomness. * Default: 40, Range: 1-100 (higher = more diverse) */ top_k?: number; /** * Nucleus sampling - cumulative probability cutoff for token selection. * Default: 0.9, Range: 0.0-1.0 (lower = more focused) */ top_p?: number; /** * Alternative to top_p - minimum probability threshold for tokens. * Default: 0.0, Range: 0.0-1.0 (higher = more selective) */ min_p?: number; /** * Typical sampling - targets tokens with "typical" probability mass. * Default: 1.0 (disabled), Range: 0.0-1.0 (lower = less random) */ typical_p?: number; /** * Number of previous tokens to consider for repetition penalty. * Default: 64, Range: 0-512 */ repeat_last_n?: number; /** * Randomness/creativity control. Lower = more deterministic, higher = more creative. * Default: 0.8, Range: 0.0-2.0 (sweet spot: 0.1-1.2) */ temperature?: number; /** * Penalty for repeating tokens. Higher values reduce repetition. * Default: 1.1, Range: 0.0-2.0 (1.0 = no penalty) */ repeat_penalty?: number; /** * Penalty for using tokens that have already appeared (OpenAI-style). * Default: 0.0, Range: -2.0 to 2.0 */ presence_penalty?: number; /** * Penalty proportional to token frequency in text (OpenAI-style). * Default: 0.0, Range: -2.0 to 2.0 */ frequency_penalty?: number; /** * Enables Mirostat sampling algorithm (0=disabled, 1=v1, 2=v2). * Default: 0, Range: 0, 1, or 2 */ mirostat?: number; /** * Target entropy for Mirostat. Controls coherence vs creativity balance. * Default: 5.0, Range: 0.0-10.0 */ mirostat_tau?: number; /** * Learning rate for Mirostat. How quickly it adapts. * Default: 0.1, Range: 0.001-1.0 */ mirostat_eta?: number; /** * Apply penalty to newline tokens to control formatting. * Default: true */ penalize_newline?: boolean; /** * Array of strings that will stop generation when encountered. * Default: [], Example: ["\n", "User:", "###"] */ stop?: string[]; /** * Enable NUMA (Non-Uniform Memory Access) optimization. * Default: false (Linux systems may benefit from true) */ numa?: boolean; /** * Context window size - total tokens for prompt + response. * Default: 2048, Range: 512-32768+ (model dependent, affects memory usage) */ num_ctx?: number; /** * Batch size for prompt processing. Higher = faster but more memory. * Default: 512, Range: 1-2048 */ num_batch?: number; /** * Number of GPU layers to offload. -1 = auto, 0 = CPU only. * Default: -1, Range: -1 to model layer count */ num_gpu?: number; /** * Primary GPU device ID for multi-GPU setups. * Default: 0, Range: 0 to (GPU count - 1) */ main_gpu?: number; /** * Optimize for low VRAM usage at cost of speed. * Default: false */ low_vram?: boolean; /** * Only load vocabulary, skip weights. For tokenization only. * Default: false */ vocab_only?: boolean; /** * Use memory mapping for model files (faster loading). * Default: true */ use_mmap?: boolean; /** * Lock model in memory to prevent swapping. * Default: false (enable for consistent performance) */ use_mlock?: boolean; /** * Number of CPU threads for inference. * Default: auto-detected, Range: 1 to CPU core count */ num_thread?: number; }