import { z } from "zod";

export const CDSDataSchema = z.object({
  sample_id: z.string(),
  contig_id: z.string(),
  elem_type: z.union([z.literal("CDS"), z.literal("unknown")]),
  cds_shorthand: z.string(),
  strand: z.enum(["forward", "reverse"]),
  start: z.number().int().positive(),
  end: z.number().int().positive(),
});
export type CDSData = z.infer<typeof CDSDataSchema>;

export const ClipDataSchema = z.object({
  clipId: z.string(),
  clipDescription: z.string(),
  clipScore: z.number(),
});

export const OperonPredictionSchema = z.object({
  operonId: z.number(),
  probability: z.number(),
});
export type OperonPrediction = z.infer<typeof OperonPredictionSchema>;

export const ProteinRecordSchema = z.object({
  cdsData: CDSDataSchema,
  centroidCdsData: CDSDataSchema,
  sequence: z.string(),
  operonPrediction: OperonPredictionSchema.nullable(),
  clipAnnotation: ClipDataSchema.nullable(),
});
export type ProteinRecord = z.infer<typeof ProteinRecordSchema>;

export const ProteinSearchResponseInputSchema = z.object({
  status: z.string(),
  metadata: z.object({
    resultsReturned: z.number().int(),
    resultsClamped: z.boolean(),
  }),
  data: z.object({
    debug: z.boolean().nullable(),
    query: z.object({
      umapData: z.object({
        x: z.number(),
        y: z.number(),
        color: z.string(),
      }),
    }),
    matches: z.array(
      z.object({
        score: z.number(),
        match_index: z.number(),
        matchUmapData: z.object({
          x: z.number(),
          y: z.number(),
          color: z.string(),
        }),
        matchTaxonomy: z.object({
          domain: z.string(),
          phylum: z.string(),
          class_: z.string(),
          order: z.string(),
          family: z.string(),
          genus: z.string(),
          species: z.string(),
        }),
        contig: z.array(
          z.object({
            cdsData: CDSDataSchema,
            sequence: z.string(),
            centroidCdsData: CDSDataSchema,
            centroidSequence: z.string(),
            operonPrediction: OperonPredictionSchema.nullable(),
            clipAnnotation: ClipDataSchema.nullable(),
          }),
        ),
      }),
    ),
  }),
});
export type ProteinSearchResponseInput = z.infer<
  typeof ProteinSearchResponseInputSchema
>;

export const UMAPDatumSchema = z.object({
  cdsData: z.union([CDSDataSchema, z.literal("query")]), // if we are the query, we necessarily don't have CDS data
  x: z.number(),
  y: z.number(),
  cos_sim_color: z.string(),
  clipAnnotation: ClipDataSchema.nullable(),
});
export type UMAPDatum = z.infer<typeof UMAPDatumSchema>;

export const TaxonomySchema = z.object({
  domain: z.string(),
  phylum: z.string(),
  class_: z.string(),
  order: z.string(),
  family: z.string(),
  genus: z.string(),
  species: z.string(),
});
export type Taxonomy = z.infer<typeof TaxonomySchema>;

export const ProteinSearchResponseOutputSchema = z.object({
  query: z.object({
    umap_datum: UMAPDatumSchema,
    record: ProteinRecordSchema,
  }),
  match_data: z.array(
    z.object({
      score: z.number(),
      match: ProteinRecordSchema,
      match_umap_datum: UMAPDatumSchema,
      taxonomy: TaxonomySchema,
      contig: z.array(ProteinRecordSchema),
    }),
  ),
});

export type ProteinSearchResponseOutput = z.infer<
  typeof ProteinSearchResponseOutputSchema
>;

export const ProteinSearchRequestSchema = z.object({
  sequence: z.string(),
  maxResults: z.number().optional(),
  mockEmbedding: z.boolean().optional(),
  contextBefore: z.number().optional(),
  contextAfter: z.number().optional(),
  debug: z.boolean().default(false),
});

// query parameters are always strings
export const ProteinSearchQueryParams = z.object({
  contextBefore: z
    .string()
    .optional()
    .transform((val) => (val ? Number(val) : undefined))
    .refine((val) => val === undefined || (val >= 1 && val <= 100), {
      message: "contextBefore must be between 1 and 100",
    }),
  contextAfter: z
    .string()
    .optional()
    .transform((val) => (val ? Number(val) : undefined))
    .refine((val) => val === undefined || (val >= 1 && val <= 100), {
      message: "contextAfter must be between 1 and 100",
    }),
  debug: z
    .string()
    .optional()
    .transform(
      (val) => val !== undefined && val.toLowerCase().trim() !== "false",
    ),
  mockEmbedding: z
    .string()
    .optional()
    .transform((val) => val !== undefined),
});

export const UNANNOTATED_CLIP_LABEL = "Unannotated";

export const convertProteinSearchInputToOutput = (
  input: ProteinSearchResponseInput,
  querySequence: string,
): ProteinSearchResponseOutput => {
  return {
    query: {
      record: {
        sequence: querySequence,
        cdsData: {
          sample_id: "user_query_sample_id",
          contig_id: "user_query_contig_id",
          elem_type: "unknown",
          cds_shorthand: "user_query_cds_shorthand",
          strand: "forward",
          start: 1,
          end: querySequence.length,
        },
        operonPrediction: null,
        centroidCdsData: {
          sample_id: "user_query_centroid_sample_id",
          contig_id: "user_query_centroid_contig_id",
          elem_type: "unknown",
          cds_shorthand: "user_query_centroid_cds_shorthand",
          strand: "forward",
          start: 1,
          end: querySequence.length,
        },
        clipAnnotation: null,
      },
      umap_datum: {
        x: input.data.query.umapData.x,
        y: input.data.query.umapData.y,
        cdsData: "query",
        cos_sim_color: input.data.query.umapData.color,
        clipAnnotation: null,
      },
    },
    match_data: input.data.matches.map((match) => {
      const matchProtein = match.contig[match.match_index];
      return {
        score: match.score,
        match_umap_datum: {
          x: match.matchUmapData.x,
          y: match.matchUmapData.y,
          cdsData: matchProtein.cdsData,
          cos_sim_color: match.matchUmapData.color,
          clipAnnotation: matchProtein.clipAnnotation,
        },
        taxonomy: match.matchTaxonomy,
        match: {
          cdsData: matchProtein.cdsData,
          centroidCdsData: matchProtein.centroidCdsData,
          clipAnnotation: matchProtein.clipAnnotation,
          sequence: matchProtein.sequence,
          clipData: matchProtein.clipAnnotation,
          operonPrediction: matchProtein.operonPrediction,
        },
        contig: match.contig
          .filter((_, i) => i !== match.match_index)
          .map((protein) => ({
            cdsData: protein.cdsData,
            centroidSequence: protein.centroidSequence,
            centroidCdsData: protein.centroidCdsData,
            sequence: protein.sequence,
            clipAnnotation: protein.clipAnnotation,
            operonPrediction: protein.operonPrediction,
          })),
      };
    }),
  };
};

export const taxonomyToLinageString = (data: Taxonomy) => {
  const capitalized = (str: string): string => {
    return str.charAt(0).toUpperCase() + str.slice(1);
  };

  // capitalize each part of the lineage
  const { domain, phylum, class_, order, family, genus, species } = {
    domain: capitalized(data.domain),
    phylum: capitalized(data.phylum),
    class_: capitalized(data.class_), // class_ to avoid conflict with class keyword
    order: capitalized(data.order),
    family: capitalized(data.family),
    genus: capitalized(data.genus),
    species: capitalized(data.species),
  };

  const domainStr = `d__${domain}`;
  const phylumStr = `p__${phylum}`;
  const classStr = `c__${class_}`;
  const orderStr = `o__${order}`;
  const familyStr = `f__${family}`;
  const genusStr = `g__${genus}`;
  const speciesStr = `s__${genus} ${species}`; // species always contains genus
  return [
    domainStr,
    phylumStr,
    classStr,
    orderStr,
    familyStr,
    genusStr,
    speciesStr,
  ].join("; ");
};
export const cdsDataToString = (cdsData: CDSData): string => {
  const { sample_id, contig_id, elem_type, cds_shorthand, strand, start, end } =
    cdsData;

  return `${sample_id}|${contig_id}|${elem_type}|${cds_shorthand}|${strand}|${start}:${end}`;
};
