Files
pleroma-ollama-bot/types.d.ts

340 lines
7.8 KiB
TypeScript

export interface Notification {
account: Account;
status: Status;
id: string;
type: string;
created_at: string;
}
export interface ContextResponse {
ancestors: ContextObject[];
descendents: ContextObject[];
}
export interface PostAncestorsForModel {
account_fqn: string;
mentions: string[];
plaintext_content: string;
}
interface ContextAccountObject {
acct: string;
avatar: string;
bot: boolean;
display_name: string;
followers_count: number;
following_count: number;
fqn: string;
id: string;
}
export interface ContextObject {
content: string;
id: string;
in_reply_to_account_id: string | null;
in_reply_to_id: string | null;
media_attachments: string[];
mentions: Mention[];
pleroma: PleromaObjectInResponse;
visibility: "public" | "private" | "unlisted";
uri: string;
account: ContextAccountObject;
}
export interface NewStatusBody {
content_type: "application/json" | "text/markdown";
in_reply_to_id?: string;
media_ids?: string[];
sensitive?: "true" | "false" | boolean;
status: string;
to?: string[];
}
export interface Account {
acct: string; // nickname
bot: boolean;
display_name: string;
fqn: string; // user@instance.tld
id: string; // user ID
note?: string; // bio
}
export interface OllamaRequest {
/**
* Name of the Ollama model to generate a response from. Must be a valid and locally installed model.
*/
model: string;
/**
* The prompt sent from the end-user.
*/
prompt: string;
/**
* Whatever system prompt you'd like to add to the model to make it more unique, or force it to respond a certain way.
*/
system?: string;
/**
* Whether to stream responses from the API, or have it sent all as one payload.
*/
stream?: boolean = false;
/**
* Ollama configuration options
*/
options?: OllamaConfigOptions;
}
export interface OllamaChatRequest {
model: string;
messages: OllamaMessages[];
stream?: boolean = false;
options?: OllamaConfigOptions;
}
export interface OllamaChatResponse {
model: string;
created_at: string;
message: OllamaChatResponseMessage;
done_reason: "string";
done: boolean;
total_duration: number;
load_duration: number;
prompt_eval_count: number;
prompt_eval_duration: number;
eval_count: number;
eval_duration: number;
}
interface OllamaChatResponseMessage {
role: "assistant";
content: string;
}
interface OllamaMessages {
role: "system" | "user";
content: string;
}
export interface OllamaResponse {
model: string;
created_at: Date | string;
response: string;
done: boolean;
done_reason: string;
}
export interface Status {
account: Account;
content: string; // content of the post
created_at: string | Date; // when the post was created
id: string; // ID of the reply itself
in_reply_to_account_id: string; // account ID of the reply
in_reply_to_id: string; // status that the user has replied to
mentions: Mention[]; // array of mentions
pleroma: PleromaObjectInResponse;
visibility: "private" | "public" | "unlisted";
}
interface PleromaObjectInResponse {
content: { "text/plain": string };
context: string;
conversation_id: number;
direct_conversation_id: number | null;
local: boolean;
in_reply_to_account_acct: string;
}
export interface Mention {
acct: string;
id: string;
url: string;
username: string;
}
export interface PleromaEmoji {
[emojiName: string]: PleromaEmojiMetadata;
}
interface PleromaEmojiMetadata {
image_url: string;
tags: string[];
}
interface ReactionRequest {
name: string; // emoji name without colons
}
interface ReactionResponse {
name: string;
count: number;
me: boolean;
url?: string;
static_url?: string;
}
/**
* Experimental settings, I wouldn't recommend messing with these if you don't know how they work (I don't either)
*/
export interface OllamaConfigOptions {
/**
* Number of tokens guaranteed to be kept in memory during response generation.
* Higher values leave less room for num_ctx. Used to preserve important context.
* Default: 0, Range: 0-512
*/
num_keep?: number;
/**
* Random seed for reproducible outputs. Same seed + same inputs = same output.
* Default: -1 (random), Range: any integer
*/
seed?: number;
/**
* Maximum number of tokens to generate in the response. Controls response length.
* Default: 128, Range: 1-4096+ (model dependent)
*/
num_predict?: number;
/**
* Limits token selection to top K most probable tokens. Reduces randomness.
* Default: 40, Range: 1-100 (higher = more diverse)
*/
top_k?: number;
/**
* Nucleus sampling - cumulative probability cutoff for token selection.
* Default: 0.9, Range: 0.0-1.0 (lower = more focused)
*/
top_p?: number;
/**
* Alternative to top_p - minimum probability threshold for tokens.
* Default: 0.0, Range: 0.0-1.0 (higher = more selective)
*/
min_p?: number;
/**
* Typical sampling - targets tokens with "typical" probability mass.
* Default: 1.0 (disabled), Range: 0.0-1.0 (lower = less random)
*/
typical_p?: number;
/**
* Number of previous tokens to consider for repetition penalty.
* Default: 64, Range: 0-512
*/
repeat_last_n?: number;
/**
* Randomness/creativity control. Lower = more deterministic, higher = more creative.
* Default: 0.8, Range: 0.0-2.0 (sweet spot: 0.1-1.2)
*/
temperature?: number;
/**
* Penalty for repeating tokens. Higher values reduce repetition.
* Default: 1.1, Range: 0.0-2.0 (1.0 = no penalty)
*/
repeat_penalty?: number;
/**
* Penalty for using tokens that have already appeared (OpenAI-style).
* Default: 0.0, Range: -2.0 to 2.0
*/
presence_penalty?: number;
/**
* Penalty proportional to token frequency in text (OpenAI-style).
* Default: 0.0, Range: -2.0 to 2.0
*/
frequency_penalty?: number;
/**
* Enables Mirostat sampling algorithm (0=disabled, 1=v1, 2=v2).
* Default: 0, Range: 0, 1, or 2
*/
mirostat?: number;
/**
* Target entropy for Mirostat. Controls coherence vs creativity balance.
* Default: 5.0, Range: 0.0-10.0
*/
mirostat_tau?: number;
/**
* Learning rate for Mirostat. How quickly it adapts.
* Default: 0.1, Range: 0.001-1.0
*/
mirostat_eta?: number;
/**
* Apply penalty to newline tokens to control formatting.
* Default: true
*/
penalize_newline?: boolean;
/**
* Array of strings that will stop generation when encountered.
* Default: [], Example: ["\n", "User:", "###"]
*/
stop?: string[];
/**
* Enable NUMA (Non-Uniform Memory Access) optimization.
* Default: false (Linux systems may benefit from true)
*/
numa?: boolean;
/**
* Context window size - total tokens for prompt + response.
* Default: 2048, Range: 512-32768+ (model dependent, affects memory usage)
*/
num_ctx?: number;
/**
* Batch size for prompt processing. Higher = faster but more memory.
* Default: 512, Range: 1-2048
*/
num_batch?: number;
/**
* Number of GPU layers to offload. -1 = auto, 0 = CPU only.
* Default: -1, Range: -1 to model layer count
*/
num_gpu?: number;
/**
* Primary GPU device ID for multi-GPU setups.
* Default: 0, Range: 0 to (GPU count - 1)
*/
main_gpu?: number;
/**
* Optimize for low VRAM usage at cost of speed.
* Default: false
*/
low_vram?: boolean;
/**
* Only load vocabulary, skip weights. For tokenization only.
* Default: false
*/
vocab_only?: boolean;
/**
* Use memory mapping for model files (faster loading).
* Default: true
*/
use_mmap?: boolean;
/**
* Lock model in memory to prevent swapping.
* Default: false (enable for consistent performance)
*/
use_mlock?: boolean;
/**
* Number of CPU threads for inference.
* Default: auto-detected, Range: 1 to CPU core count
*/
num_thread?: number;
}