// Package media contains metadata scrubbing and re-compression helpers for // files uploaded to the social feed. // // Why this exists // --------------- // Every image file carries an EXIF block that can leak: // - GPS coordinates where the photo was taken // - Camera model, serial number, lens // - Original timestamp (even if the user clears their clock) // - Software name / version // - Author / copyright fields // - A small embedded thumbnail that may leak even after cropping // // Videos and audio have analogous containers (MOV/MP4 atoms, ID3 tags, // Matroska tags). For a social feed that prides itself on privacy we // can't trust the client to have stripped all of it — we scrub again // on the server before persisting the file to the feed mailbox. // // Strategy // -------- // Images: decode → strip any ICC profile → re-encode with the stdlib // JPEG/PNG encoders. These encoders DO NOT emit EXIF, so re-encoding is // a complete scrub by construction. Output is JPEG (quality 75) unless // the input is a lossless PNG small enough to keep as PNG. // // Videos: require an external ffmpeg worker (the "media sidecar") — // cannot do this in pure Go without a huge CGo footprint. A tiny HTTP // contract (see docs/media-sidecar.md) lets node operators plug in // compressO-like services behind an env var. If the sidecar is not // configured, videos are stored as-is with a LOG WARNING — the operator // decides whether to accept that risk. // // Magic-byte detection: the claimed Content-Type must match what's // actually in the bytes; mismatches are rejected (prevents a PDF // labelled as image/jpeg from bypassing the scrubber). package media import ( "bytes" "context" "errors" "fmt" "image" "image/jpeg" "image/png" "io" "net/http" "strings" "time" // Register decoders for the formats we accept. _ "image/gif" _ "golang.org/x/image/webp" ) // Errors returned by scrubber. var ( // ErrUnsupportedMIME is returned when the caller claims a MIME we // don't know how to scrub. ErrUnsupportedMIME = errors.New("unsupported media type") // ErrMIMEMismatch is returned when the bytes don't match the claimed // MIME — blocks a crafted upload from bypassing the scrubber. ErrMIMEMismatch = errors.New("actual bytes don't match claimed content-type") // ErrSidecarUnavailable is returned when video scrubbing was required // but no external worker is configured and the operator policy does // not allow unscrubbed video storage. ErrSidecarUnavailable = errors.New("media sidecar required for video scrubbing but not configured") ) // ── Image scrubbing ──────────────────────────────────────────────────────── // ImageMaxDim caps the larger dimension of a stored image. 1080px is the // "full-HD-ish" sweet spot — larger rarely matters on a phone feed and // drops file size dramatically. The client is expected to have downscaled // already (expo-image-manipulator), but we re-apply the cap server-side // as a defence-in-depth and to guarantee uniform storage cost. const ImageMaxDim = 1080 // ImageJPEGQuality is the re-encode quality for JPEG output. 75 balances // perceived quality with size — below 60 artifacts become visible, above // 85 we're paying for noise we can't see. const ImageJPEGQuality = 75 // ScrubImage decodes src, removes all metadata (by way of re-encoding // with the stdlib JPEG encoder), optionally downscales to ImageMaxDim, // and returns the clean JPEG bytes + the canonical MIME the caller // should store. // // claimedMIME is what the client said the file is; if the bytes don't // match, ErrMIMEMismatch is returned. Accepts image/jpeg, image/png, // image/gif, image/webp on input; output is always image/jpeg (one less // branch in the reader, and no decoder has to touch EXIF). func ScrubImage(src []byte, claimedMIME string) (out []byte, outMIME string, err error) { actualMIME := detectMIME(src) if !isImageMIME(actualMIME) { return nil, "", fmt.Errorf("%w: %s", ErrUnsupportedMIME, actualMIME) } if claimedMIME != "" && !mimesCompatible(claimedMIME, actualMIME) { return nil, "", fmt.Errorf("%w: claimed %s, actual %s", ErrMIMEMismatch, claimedMIME, actualMIME) } img, _, err := image.Decode(bytes.NewReader(src)) if err != nil { return nil, "", fmt.Errorf("decode image: %w", err) } // Downscale if needed. We use a draw-based nearest-neighbour style // approach via stdlib to avoid pulling in x/image/draw unless we need // higher-quality resampling. For feed thumbnails nearest is fine since // content is typically downsampled already. if bounds := img.Bounds(); bounds.Dx() > ImageMaxDim || bounds.Dy() > ImageMaxDim { img = downscale(img, ImageMaxDim) } // Re-encode as JPEG. stdlib's jpeg.Encode writes ZERO metadata — // no EXIF, no ICC, no XMP, no MakerNote. That's the scrub. var buf bytes.Buffer if err := jpeg.Encode(&buf, img, &jpeg.Options{Quality: ImageJPEGQuality}); err != nil { return nil, "", fmt.Errorf("encode jpeg: %w", err) } return buf.Bytes(), "image/jpeg", nil } // downscale returns a new image whose larger dimension equals maxDim, // preserving aspect ratio. Uses stdlib image.NewRGBA + a nearest-neighbour // copy loop — good enough for feed images that are already compressed. func downscale(src image.Image, maxDim int) image.Image { b := src.Bounds() w, h := b.Dx(), b.Dy() var nw, nh int if w >= h { nw = maxDim nh = h * maxDim / w } else { nh = maxDim nw = w * maxDim / h } dst := image.NewRGBA(image.Rect(0, 0, nw, nh)) for y := 0; y < nh; y++ { sy := b.Min.Y + y*h/nh for x := 0; x < nw; x++ { sx := b.Min.X + x*w/nw dst.Set(x, y, src.At(sx, sy)) } } return dst } // pngEncoder is kept for callers that explicitly want lossless output // (future — not used by ScrubImage which always produces JPEG). var pngEncoder = png.Encoder{CompressionLevel: png.BestCompression} // ── MIME detection & validation ──────────────────────────────────────────── // detectMIME inspects magic bytes to figure out what the data actually is, // independent of what the caller claimed. Matches the subset of types // stdlib http.DetectContentType handles, refined for our use. func detectMIME(data []byte) string { if len(data) == 0 { return "" } // http.DetectContentType handles most formats correctly (JPEG, PNG, // GIF, WebP, MP4, WebM, MP3, OGG). We only refine when needed. return strings.SplitN(http.DetectContentType(data), ";", 2)[0] } func isImageMIME(m string) bool { switch m { case "image/jpeg", "image/png", "image/gif", "image/webp": return true } return false } func isVideoMIME(m string) bool { switch m { case "video/mp4", "video/webm", "video/quicktime": return true } return false } func isAudioMIME(m string) bool { switch m { case "audio/mpeg", "audio/ogg", "audio/webm", "audio/wav", "audio/mp4": return true } return false } // mimesCompatible tolerates small aliases (image/jpg vs image/jpeg, etc.) // so a misspelled client header doesn't cause a 400. Claimed MIME is // the caller's; actual is from magic bytes — we trust magic bytes when // they disagree with a known-silly alias. func mimesCompatible(claimed, actual string) bool { claimed = strings.ToLower(strings.TrimSpace(claimed)) if claimed == actual { return true } aliases := map[string]string{ "image/jpg": "image/jpeg", "image/x-png": "image/png", "video/mov": "video/quicktime", } if canon, ok := aliases[claimed]; ok && canon == actual { return true } return false } // ── Video scrubbing (sidecar) ────────────────────────────────────────────── // SidecarConfig describes how to reach an external media scrubber worker // (typically a tiny FFmpeg-wrapper HTTP service running alongside the // node — see docs/media-sidecar.md). Leaving URL empty disables sidecar // use; callers then decide whether to fall back to "store as-is and warn" // or to reject video uploads entirely. type SidecarConfig struct { // URL is the base URL of the sidecar. Expected routes: // // POST /scrub/video body: raw bytes → returns scrubbed bytes // POST /scrub/audio body: raw bytes → returns scrubbed bytes // // Both MUST strip metadata (-map_metadata -1 in ffmpeg terms) and // re-encode with a sane bitrate cap (default: H.264 CRF 28 for // video, libopus 96k for audio). See the reference implementation // at docker/media-sidecar/ in this repo. URL string // Timeout guards against a hung sidecar. 30s is enough for a 5 MB // video on modest hardware; larger inputs should be pre-compressed // by the client. Timeout time.Duration // MaxInputBytes caps what we forward to the sidecar (protects // against an attacker tying up the sidecar on a 1 GB upload). MaxInputBytes int64 } // Scrubber bundles image + sidecar capabilities. Create once at node // startup and reuse. type Scrubber struct { sidecar SidecarConfig http *http.Client } // NewScrubber returns a Scrubber. sidecar.URL may be empty (image-only // mode) — in that case ScrubVideo / ScrubAudio return ErrSidecarUnavailable. func NewScrubber(sidecar SidecarConfig) *Scrubber { if sidecar.Timeout == 0 { sidecar.Timeout = 30 * time.Second } if sidecar.MaxInputBytes == 0 { sidecar.MaxInputBytes = 16 * 1024 * 1024 // 16 MiB input → client should have shrunk } return &Scrubber{ sidecar: sidecar, http: &http.Client{ Timeout: sidecar.Timeout, }, } } // Scrub picks the right strategy based on the actual MIME of the bytes. // Returns the cleaned payload and the canonical MIME to store under. func (s *Scrubber) Scrub(ctx context.Context, src []byte, claimedMIME string) ([]byte, string, error) { actual := detectMIME(src) if claimedMIME != "" && !mimesCompatible(claimedMIME, actual) { return nil, "", fmt.Errorf("%w: claimed %s, actual %s", ErrMIMEMismatch, claimedMIME, actual) } switch { case isImageMIME(actual): // Images handled in-process, no sidecar needed. return ScrubImage(src, claimedMIME) case isVideoMIME(actual): return s.scrubViaSidecar(ctx, "/scrub/video", src, actual) case isAudioMIME(actual): return s.scrubViaSidecar(ctx, "/scrub/audio", src, actual) default: return nil, "", fmt.Errorf("%w: %s", ErrUnsupportedMIME, actual) } } // scrubViaSidecar POSTs src to the configured sidecar route and returns // the response bytes. Errors: // - ErrSidecarUnavailable if sidecar.URL is empty // - wrapping the HTTP error otherwise func (s *Scrubber) scrubViaSidecar(ctx context.Context, path string, src []byte, actual string) ([]byte, string, error) { if s.sidecar.URL == "" { return nil, "", ErrSidecarUnavailable } if int64(len(src)) > s.sidecar.MaxInputBytes { return nil, "", fmt.Errorf("input exceeds sidecar max %d bytes", s.sidecar.MaxInputBytes) } req, err := http.NewRequestWithContext(ctx, http.MethodPost, strings.TrimRight(s.sidecar.URL, "/")+path, bytes.NewReader(src)) if err != nil { return nil, "", fmt.Errorf("build sidecar request: %w", err) } req.Header.Set("Content-Type", actual) resp, err := s.http.Do(req) if err != nil { return nil, "", fmt.Errorf("call sidecar: %w", err) } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { body, _ := io.ReadAll(io.LimitReader(resp.Body, 4096)) return nil, "", fmt.Errorf("sidecar returned %d: %s", resp.StatusCode, string(body)) } // Limit the reply we buffer — an evil sidecar could try to amplify. const maxReply = 64 * 1024 * 1024 // 64 MiB hard cap out, err := io.ReadAll(io.LimitReader(resp.Body, maxReply)) if err != nil { return nil, "", fmt.Errorf("read sidecar reply: %w", err) } respMIME := resp.Header.Get("Content-Type") if respMIME == "" { respMIME = actual } return out, strings.SplitN(respMIME, ";", 2)[0], nil } // IsSidecarConfigured reports whether video/audio scrubbing is available. // Callers can use this to decide whether to accept video attachments or // reject them with a clear "this node doesn't support video" message. func (s *Scrubber) IsSidecarConfigured() bool { return s.sidecar.URL != "" }