diff --git a/README.md b/README.md index 33ecdd9..b0ac633 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,36 @@ -# llm-proxy-go +# LLM Proxy +HTTP proxy for LLM APIs with streaming support and chunk processing. + +## Usage + +```bash +./llm-proxy +``` + +## Configuration + +| Variable | Description | Default | +|----------|-------------|---------| +| `UPSTREAM_URL` | Upstream LLM API URL | `https://api.openai.com/v1/chat/completions` | +| `LISTEN_ADDR` | Listen address | `:8080` | +| `API_KEY` | Upstream API key | - | +| `INSECURE` | Skip TLS verification | `false` | + +## Example + +```bash +UPSTREAM_URL=https://api.openai.com/v1/chat/completions \ +API_KEY=sk-... \ +LISTEN_ADDR=:8080 \ +./llm-proxy +``` + +## Endpoints + +- `GET /health` - Health check +- `/*` - Proxies all requests to upstream + +## Streaming + +Supports SSE (`text/event-stream`) and NDJSON (`application/x-ndjson`) streaming. Each chunk is processed via `processChunk()` before forwarding. \ No newline at end of file diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..9e493bf --- /dev/null +++ b/go.mod @@ -0,0 +1,7 @@ +module hatter.me/tools/llm-proxy + +go 1.24.3 + +require github.com/joho/godotenv v1.5.1 + +require github.com/pelletier/go-toml/v2 v2.3.0 // indirect diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..c98ed14 --- /dev/null +++ b/go.sum @@ -0,0 +1,4 @@ +github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0= +github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4= +github.com/pelletier/go-toml/v2 v2.3.0 h1:k59bC/lIZREW0/iVaQR8nDHxVq8OVlIzYCOJf421CaM= +github.com/pelletier/go-toml/v2 v2.3.0/go.mod h1:2gIqNv+qfxSVS7cM2xJQKtLSTLUE9V8t9Stt+h56mCY= diff --git a/justfile b/justfile new file mode 100644 index 0000000..e35e16a --- /dev/null +++ b/justfile @@ -0,0 +1,11 @@ +_: + @just --list + + +alias b:=build + +# Build llm-proxy +build: + go build + + diff --git a/llm-proxy.toml b/llm-proxy.toml new file mode 100644 index 0000000..80a2540 --- /dev/null +++ b/llm-proxy.toml @@ -0,0 +1,13 @@ +# LLM Proxy Configuration + +# Upstream LLM API URL +upstream_url = "https://api.openai.com/v1/chat/completions" + +# Listen address +listen_addr = ":8080" + +# Upstream API key +api_key = "" + +# Skip TLS verification +insecure = false \ No newline at end of file diff --git a/main.go b/main.go new file mode 100644 index 0000000..fbd1d73 --- /dev/null +++ b/main.go @@ -0,0 +1,247 @@ +package main + +import ( + "bufio" + "context" + "crypto/tls" + "fmt" + "io" + "net/http" + "net/url" + "os" + "os/signal" + "strings" + "syscall" + "time" + + "github.com/joho/godotenv" + "github.com/pelletier/go-toml/v2" +) + +type Config struct { + UpstreamURL string + ListenAddr string + APIKey string + Insecure bool +} + +func main() { + if len(os.Args) > 1 && (os.Args[1] == "-h" || os.Args[1] == "--help") { + printHelp() + os.Exit(0) + } + + godotenv.Load() + + cfg := loadConfig() + + printConfig(cfg) + + if cfg.APIKey == "" { + fmt.Fprintln(os.Stderr, "error: API_KEY is required") + os.Exit(1) + } + + if cfg.Insecure { + fmt.Fprintln(os.Stderr, "WARNING: TLS verification disabled") + } + + mux := http.NewServeMux() + mux.HandleFunc("/", handleProxy(cfg)) + + srv := &http.Server{ + Addr: cfg.ListenAddr, + Handler: mux, + } + + go func() { + fmt.Printf("LLM Proxy listening on %s\n", cfg.ListenAddr) + fmt.Printf("Upstream: %s\n", cfg.UpstreamURL) + srv.ListenAndServe() + }() + + sigCh := make(chan os.Signal, 1) + signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM) + <-sigCh + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + srv.Shutdown(ctx) +} + +func handleProxy(cfg Config) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path == "/health" { + w.WriteHeader(http.StatusOK) + return + } + + proxyReq := cloneRequest(r, cfg.UpstreamURL) + + if cfg.APIKey != "" { + proxyReq.Header.Set("Authorization", "Bearer "+cfg.APIKey) + } + + tr := &http.Transport{ + TLSClientConfig: &tls.Config{InsecureSkipVerify: cfg.Insecure}, + } + client := &http.Client{Transport: tr} + + resp, err := client.Do(proxyReq) + if err != nil { + http.Error(w, fmt.Sprintf("upstream error: %v", err), http.StatusBadGateway) + return + } + defer resp.Body.Close() + + for k, v := range resp.Header { + w.Header()[k] = v + } + w.WriteHeader(resp.StatusCode) + + if !isStreaming(resp) { + io.Copy(w, resp.Body) + return + } + + handleStream(w, resp.Body, cfg) + } +} + +func cloneRequest(r *http.Request, upstreamURL string) *http.Request { + upstream, _ := url.Parse(upstreamURL) + + proxyReq := r.Clone(context.Background()) + proxyReq.URL.Scheme = upstream.Scheme + proxyReq.URL.Host = upstream.Host + proxyReq.URL.Path = strings.ReplaceAll(r.URL.Path, upstream.Path, "") + if upstream.Path != "" && !strings.HasSuffix(proxyReq.URL.Path, "/") { + proxyReq.URL.Path = upstream.Path + proxyReq.URL.Path + } + proxyReq.Host = upstream.Host + + if val := r.Header.Get("Content-Type"); val != "" { + proxyReq.Header.Set("Content-Type", val) + } + + proxyReq.Header.Del("Host") + proxyReq.Header.Del("Authorization") + + return proxyReq +} + +func isStreaming(resp *http.Response) bool { + ct := strings.ToLower(resp.Header.Get("Content-Type")) + return strings.Contains(ct, "text/event-stream") || + strings.Contains(ct, "application/x-ndjson") || + strings.Contains(ct, "stream") +} + +func handleStream(w io.Writer, body io.Reader, cfg Config) { + reader := bufio.NewReader(body) + + for { + line, err := reader.ReadString('\n') + if err != nil { + if err != io.EOF { + fmt.Fprintf(os.Stderr, "stream error: %v\n", err) + } + break + } + + line = strings.TrimRight(line, "\r\n") + if line == "" { + continue + } + + processChunk(w, line, cfg) + } +} + +func processChunk(w io.Writer, line string, cfg Config) { + if strings.HasPrefix(line, "data: ") { + data := strings.TrimPrefix(line, "data: ") + if data == "[DONE]" { + fmt.Fprintln(w, line) + return + } + } + + fmt.Fprintln(w, line) +} + +func getEnv(key, def string) string { + if val := os.Getenv(key); val != "" { + return val + } + return def +} + +func printHelp() { + fmt.Println(`LLM Proxy - HTTP proxy for LLM APIs + +Usage: + llm-proxy Start the proxy + llm-proxy -h Show this help + +Config: + Config file (optional): llm-proxy.toml + Environment variables take priority over config file. + +Environment Variables: + UPSTREAM_URL Upstream LLM API URL (default: https://api.openai.com/v1/chat/completions) + LISTEN_ADDR Listen address (default: :8080) + API_KEY Upstream API key (required) + INSECURE Skip TLS verification (default: false)`) +} + +func printConfig(cfg Config) { + masked := cfg.APIKey + if len(masked) > 4 { + masked = "****" + masked[len(masked)-4:] + } else { + masked = "****" + } + fmt.Printf("Upstream URL: %s\n", cfg.UpstreamURL) + fmt.Printf("Listen Addr: %s\n", cfg.ListenAddr) + fmt.Printf("API Key: %s\n", masked) + fmt.Printf("Insecure: %v\n", cfg.Insecure) +} + +func loadConfig() Config { + cfg := Config{ + UpstreamURL: "https://api.openai.com/v1/chat/completions", + ListenAddr: ":8080", + } + + if data, err := os.ReadFile("llm-proxy.toml"); err == nil { + var tomlCfg struct { + UpstreamURL string `toml:"upstream_url"` + ListenAddr string `toml:"listen_addr"` + APIKey string `toml:"api_key"` + Insecure bool `toml:"insecure"` + } + if err := toml.Unmarshal(data, &tomlCfg); err == nil { + cfg.UpstreamURL = tomlCfg.UpstreamURL + cfg.ListenAddr = tomlCfg.ListenAddr + cfg.APIKey = tomlCfg.APIKey + cfg.Insecure = tomlCfg.Insecure + fmt.Println("Loaded config from llm-proxy.toml") + } + } + + if val := os.Getenv("UPSTREAM_URL"); val != "" { + cfg.UpstreamURL = val + } + if val := os.Getenv("LISTEN_ADDR"); val != "" { + cfg.ListenAddr = val + } + if val := os.Getenv("API_KEY"); val != "" { + cfg.APIKey = val + } + if val := os.Getenv("INSECURE"); val != "" { + cfg.Insecure = val == "true" + } + + return cfg +} \ No newline at end of file