import { z } from "zod";

export const CDSIdSchema = z.object({
  sample_id: z.string(),
  contig_id: z.string(),
  elem_type: z.union([z.literal("CDS"), z.literal("unknown")]),
  cds_shorthand: z.string(),
  strand: z.enum(["forward", "reverse"]),
  start: z.number().int().positive(),
  end: z.number().int().positive(),
});
export type CDSId = z.infer<typeof CDSIdSchema>;

export const ClipDataSchema = z.object({
  clipId: z.string(),
  clipDescription: z.string(),
  clipScore: z.number(),
});

export const OperonPredictionSchema = z.object({
  operonId: z.number(),
  frequencyRank: z.number(),
  probability: z.number(),
});
export type OperonPrediction = z.infer<typeof OperonPredictionSchema>;

export const ProteinRecordSchema = z.object({
  cdsId: CDSIdSchema,
  centroidId: CDSIdSchema,
  sequence: z.string(),
  operonPrediction: OperonPredictionSchema.nullable(),
  clipAnnotation: ClipDataSchema.nullable(),
});
export type ProteinRecord = z.infer<typeof ProteinRecordSchema>;

export const ProteinSearchResponseInputSchema = z.object({
  status: z.string(),
  metadata: z.object({
    resultsReturned: z.number().int(),
    resultsClamped: z.boolean(),
  }),
  data: z.object({
    debug: z.boolean().nullable(),
    query: z.object({
      umapData: z.object({
        x: z.number(),
        y: z.number(),
        cos_sim_score: z.number(),
        color: z.string(),
      }),
    }),
    matches: z.array(
      z.object({
        score: z.number(),
        match_index: z.number(),
        matchUmapData: z.object({
          x: z.number(),
          y: z.number(),
          cos_sim_score: z.number(),
          color: z.string(),
        }),
        matchTaxonomy: z.object({
          domain: z.string(),
          phylum: z.string(),
          class_: z.string(),
          order: z.string(),
          family: z.string(),
          genus: z.string(),
          species: z.string(),
        }),
        contig: z.array(
          z.object({
            cdsId: CDSIdSchema,
            sequence: z.string(),
            centroidId: CDSIdSchema,
            centroidSequence: z.string(),
            operonPrediction: OperonPredictionSchema.nullable(),
            clipAnnotation: ClipDataSchema.nullable(),
          }),
        ),
      }),
    ),
  }),
});
export type ProteinSearchResponseInput = z.infer<
  typeof ProteinSearchResponseInputSchema
>;

export const UMAPDatumSchema = z.object({
  cdsId: z.union([CDSIdSchema, z.literal("query")]), // if we are the query, we necessarily don't have CDS data
  x: z.number(),
  y: z.number(),
  cos_sim_score: z.number(),
  cos_sim_color: z.string(),
  clipAnnotation: ClipDataSchema.nullable(),
});
export type UMAPDatum = z.infer<typeof UMAPDatumSchema>;

export const TaxonomySchema = z.object({
  domain: z.string(),
  phylum: z.string(),
  class_: z.string(),
  order: z.string(),
  family: z.string(),
  genus: z.string(),
  species: z.string(),
});
export type Taxonomy = z.infer<typeof TaxonomySchema>;

export const ProteinSearchResponseOutputSchema = z.object({
  query: z.object({
    umap_datum: UMAPDatumSchema,
    record: ProteinRecordSchema,
  }),
  searchResults: z.array(
    z.object({
      score: z.number(),
      match: ProteinRecordSchema,
      match_umap_datum: UMAPDatumSchema,
      taxonomy: TaxonomySchema,
      contig: z.array(ProteinRecordSchema),
    }),
  ),
});

export type ProteinSearchResponseOutput = z.infer<
  typeof ProteinSearchResponseOutputSchema
>;

export const ProteinSearchRequestSchema = z.object({
  sequence: z.string(),
  maxResults: z.number().default(100),
  mockEmbedding: z.boolean().default(false),
  contextBefore: z.number().default(10),
  contextAfter: z.number().default(10),
  debug: z.boolean().default(false),
});

// query parameters are always strings
export const ProteinSearchQueryParams = z.object({
  contextBefore: z
    .string()
    .optional()
    .transform((val) => (val ? Number(val) : undefined))
    .refine((val) => val === undefined || (val >= 1 && val <= 100), {
      message: "contextBefore must be between 1 and 100",
    }),
  contextAfter: z
    .string()
    .optional()
    .transform((val) => (val ? Number(val) : undefined))
    .refine((val) => val === undefined || (val >= 1 && val <= 100), {
      message: "contextAfter must be between 1 and 100",
    }),
  debug: z
    .string()
    .optional()
    .transform(
      (val) => val !== undefined && val.toLowerCase().trim() !== "false",
    ),
  mockEmbedding: z
    .string()
    .optional()
    .transform((val) => val !== undefined),
});

export const UNANNOTATED_CLIP_LABEL = "Unannotated";
