amqtrain/src/lib/db/import-amq.ts

import { eq } from "drizzle-orm";
import { z } from "zod";
import {
	AmqAnimeSchema,
	AmqArtistSchema,
	AmqGroupSchema,
	AmqSongSchema,
} from "$lib/types/amq";
import amqAnimeJson from "../../../static/data/amq-anime.json" with {
	type: "json",
};
import amqArtistsJson from "../../../static/data/amq-artists.json" with {
	type: "json",
};
import amqGroupsJson from "../../../static/data/amq-groups.json" with {
	type: "json",
};
import amqSongsJson from "../../../static/data/amq-songs.json" with {
	type: "json",
};
import { db } from "./index";
import {
	animeGenresTable,
	animeNamesTable,
	animeSongLinksTable,
	animeTable,
	animeTagsTable,
	artistAltNamesTable,
	artistGroupsTable,
	artistsTable,
	genresTable,
	groupAltNamesTable,
	groupArtistMembersTable,
	groupGroupMembersTable,
	groupsTable,
	songsTable,
	tagsTable,
} from "./schema";

/**
 * AMQ JSON import routine
 *
 * Reads JSON from `static/data`:
 * - amq-anime.json: array of anime (roughly AmqAnimeSchema, but your local data may include extra fields)
 * - amq-songs.json: array of songs (roughly AmqSongSchema, but your local data may include extra fields)
 * - amq-artists.json: array of artists (AmqArtistSchema, may include extra fields)
 * - amq-groups.json: array of groups (AmqGroupSchema, may include extra fields)
 *
 * Validates via Zod (with passthrough to tolerate extra properties in JSON),
 * then inserts into the normalized Drizzle schema in `src/lib/db/schema/index.ts`.
 *
 * This file is intended to be runnable in Bun (drizzle-orm/bun-sqlite).
 */

const AnimeArraySchema = z.array(AmqAnimeSchema.passthrough());

type ImportOptions = {
	/**
	 * When true, removes existing AMQ rows before inserting.
	 * Order matters due to FKs.
	 */
	wipeFirst?: boolean;

	/**
	 * When true, prints progress info.
	 */
	verbose?: boolean;

	/**
	 * Batch size used for insert chunks, to avoid SQLite parameter limits.
	 */
	batchSize?: number;
};

function chunk<T>(items: T[], size: number): T[][] {
	if (size <= 0) return [items];
	const out: T[][] = [];
	for (let i = 0; i < items.length; i += size)
		out.push(items.slice(i, i + size));
	return out;
}

function readJsonModule(value: unknown): unknown {
	return value;
}

function zodErrorSummary(prefix: string, err: z.ZodError): string {
	const lines = err.issues.slice(0, 50).map((issue) => {
		const p = issue.path.length ? issue.path.join(".") : "(root)";
		return `- ${p}: ${issue.message}`;
	});
	const more =
		err.issues.length > 50
			? `\n...and ${err.issues.length - 50} more issues`
			: "";
	return `${prefix}\n${lines.join("\n")}${more}`;
}

function categoryNumberToText(
	v: number | string | null | undefined,
): string | null {
	if (v === null || v === undefined) return null;
	return String(v);
}

/**
 * Artists/groups may include extra fields in JSON; we validate the required subset
 * and ignore the rest via `.passthrough()`.
 */
const ArtistSubsetSchema = AmqArtistSchema.passthrough();

/**
 * Your `songs` schema uses `annSongId` as PK and stores `songId` as non-unique.
 * JSON may contain nested `artist` / `composer` / `arranger` objects; we ignore them.
 */
const SongSubsetSchema = AmqSongSchema.passthrough();

/**
 * Your `anime` schema expects IDs that the upstream Zod schema requires.
 * Your local `amq-anime.json` sample contains `watched: false` and (in the snippet) is missing aniListId/malId/kitsuId/genres/tags.
 *
 * We validate with the strict schema (with `.passthrough()` so extra keys are allowed).
 * If your JSON truly lacks required keys, validation will fail with a clear message.
 */

export async function importAmqData(
	options: ImportOptions = {},
): Promise<void> {
	const { wipeFirst = false, verbose = true, batchSize = 500 } = options;

	const animeJson = readJsonModule(amqAnimeJson);
	const songsJson = readJsonModule(amqSongsJson);
	const artistsJson = readJsonModule(amqArtistsJson);
	const groupsJson = readJsonModule(amqGroupsJson);

	const animeParsed = AnimeArraySchema.safeParse(animeJson);
	if (!animeParsed.success) {
		throw new Error(
			zodErrorSummary(
				`Invalid anime JSON (static/data/amq-anime.json)`,
				animeParsed.error,
			),
		);
	}

	const songsParsed = z.array(SongSubsetSchema).safeParse(songsJson);
	if (!songsParsed.success) {
		throw new Error(
			zodErrorSummary(
				`Invalid songs JSON (static/data/amq-songs.json)`,
				songsParsed.error,
			),
		);
	}

	const artistsParsed = z.array(ArtistSubsetSchema).safeParse(artistsJson);
	if (!artistsParsed.success) {
		throw new Error(
			zodErrorSummary(
				`Invalid artists JSON (static/data/amq-artists.json)`,
				artistsParsed.error,
			),
		);
	}

	const groupsParsed = z
		.array(AmqGroupSchema.passthrough())
		.safeParse(groupsJson);
	if (!groupsParsed.success) {
		throw new Error(
			zodErrorSummary(
				`Invalid groups JSON (static/data/amq-groups.json)`,
				groupsParsed.error,
			),
		);
	}

	const anime = animeParsed.data;
	const songs = songsParsed.data;
	const artists = artistsParsed.data;
	const groups = groupsParsed.data;

	if (verbose) {
		// eslint-disable-next-line no-console
		console.info(
			`AMQ import: ${anime.length} anime, ${songs.length} songs, ${artists.length} artists, ${groups.length} groups`,
		);
	}

	// 2) Apply inserts in a transaction
	db.transaction(() => {
		if (wipeFirst) {
			// Child tables first, then parents (respect FKs)
			db.delete(animeSongLinksTable).run();
			db.delete(animeNamesTable).run();
			db.delete(animeGenresTable).run();
			db.delete(animeTagsTable).run();

			// artist/group graph tables
			db.delete(groupGroupMembersTable).run();
			db.delete(groupArtistMembersTable).run();
			db.delete(artistGroupsTable).run();
			db.delete(groupAltNamesTable).run();
			db.delete(artistAltNamesTable).run();

			db.delete(animeTable).run();

			// songs referenced by anime_song_links; delete after links wiped
			db.delete(songsTable).run();

			// groups referenced by graph tables
			db.delete(groupsTable).run();

			// artists referenced by graph tables
			db.delete(artistsTable).run();
		}

		// 2a) Insert artists (core)
		{
			const artistRows = artists.map((a) => ({
				songArtistId: a.songArtistId,
				name: a.name,
			}));

			for (const batch of chunk(artistRows, batchSize)) {
				if (batch.length === 0) continue;
				db.insert(artistsTable)
					.values(batch)
					.onConflictDoUpdate({
						target: artistsTable.songArtistId,
						set: {
							name: artistsTable.name,
						},
					})
					.run();
			}
		}

		// 2a.1) Insert groups (core)
		{
			const groupRows = groups.map((g) => ({
				songGroupId: g.songGroupId,
				name: g.name,
			}));

			for (const batch of chunk(groupRows, batchSize)) {
				if (batch.length === 0) continue;
				db.insert(groupsTable)
					.values(batch)
					.onConflictDoUpdate({
						target: groupsTable.songGroupId,
						set: {
							name: groupsTable.name,
						},
					})
					.run();
			}
		}

		// 2a.2) Insert memberships + alt names
		// For these join tables, easiest/most consistent is replace-all (delete then insert),
		// since they’re derived arrays and can change over time.
		{
			db.delete(artistGroupsTable).run();
			db.delete(groupArtistMembersTable).run();
			db.delete(groupGroupMembersTable).run();
			db.delete(artistAltNamesTable).run();
			db.delete(groupAltNamesTable).run();

			// artist -> groups
			const artistGroupRows = artists.flatMap((a) =>
				a.inGroups.map((songGroupId) => ({
					songArtistId: a.songArtistId,
					songGroupId,
				})),
			);
			for (const batch of chunk(artistGroupRows, batchSize)) {
				if (batch.length === 0) continue;
				db.insert(artistGroupsTable).values(batch).run();
			}

			// group -> artist members
			const groupArtistMemberRows = groups.flatMap((g) =>
				g.artistMembers.map((songArtistId) => ({
					songGroupId: g.songGroupId,
					songArtistId,
				})),
			);
			for (const batch of chunk(groupArtistMemberRows, batchSize)) {
				if (batch.length === 0) continue;
				db.insert(groupArtistMembersTable).values(batch).run();
			}

			// group -> group members
			const groupGroupMemberRows = groups.flatMap((g) =>
				g.groupMembers.map((memberSongGroupId) => ({
					songGroupId: g.songGroupId,
					memberSongGroupId,
				})),
			);
			for (const batch of chunk(groupGroupMemberRows, batchSize)) {
				if (batch.length === 0) continue;
				db.insert(groupGroupMembersTable).values(batch).run();
			}

			// artist alt names: { songArtistId, name } (stored as altSongArtistId in DB)
			const artistAltNameRows = artists.flatMap((a) =>
				a.altNames.map((alt) => ({
					songArtistId: a.songArtistId,
					altSongArtistId: alt.songArtistId,
					name: alt.name,
				})),
			);
			for (const batch of chunk(artistAltNameRows, batchSize)) {
				if (batch.length === 0) continue;
				db.insert(artistAltNamesTable).values(batch).run();
			}

			// group alt names: { songGroupId, name } where object.songGroupId is the context
			const groupAltNameRows = groups.flatMap((g) =>
				g.altNames.map((alt) => ({
					songGroupId: g.songGroupId,
					contextSongGroupId: alt.songGroupId,
					name: alt.name,
				})),
			);
			for (const batch of chunk(groupAltNameRows, batchSize)) {
				if (batch.length === 0) continue;
				db.insert(groupAltNamesTable).values(batch).run();
			}
		}

		// 2b) Insert songs
		{
			const songRows = songs.map((s) => ({
				annSongId: s.annSongId,
				songId: s.songId,
				name: s.name,
				category: s.category,
			}));

			for (const batch of chunk(songRows, batchSize)) {
				if (batch.length === 0) continue;
				db.insert(songsTable)
					.values(batch)
					.onConflictDoUpdate({
						target: songsTable.annSongId,
						set: {
							songId: songsTable.songId,
							name: songsTable.name,
							category: songsTable.category,
						},
					})
					.run();
			}
		}

		// 2c) Insert anime and its normalized children
		{
			const animeRows = anime.map((a) => ({
				annId: a.annId,
				aniListId: a.aniListId,
				malId: a.malId,
				kitsuId: a.kitsuId,
				categoryName: a.category.name,
				categoryNumber: categoryNumberToText(a.category.number),
				mainName: a.mainName,
				mainNameEn: a.mainNames.EN,
				mainNameJa: a.mainNames.JA,
				year: a.year,
				seasonId: a.seasonId,
				opCount: a.opCount,
				edCount: a.edCount,
				insertCount: a.insertCount,
			}));

			for (const batch of chunk(animeRows, batchSize)) {
				if (batch.length === 0) continue;
				db.insert(animeTable)
					.values(batch)
					.onConflictDoUpdate({
						target: animeTable.annId,
						set: {
							aniListId: animeTable.aniListId,
							malId: animeTable.malId,
							kitsuId: animeTable.kitsuId,
							categoryName: animeTable.categoryName,
							categoryNumber: animeTable.categoryNumber,
							mainName: animeTable.mainName,
							mainNameEn: animeTable.mainNameEn,
							mainNameJa: animeTable.mainNameJa,
							year: animeTable.year,
							seasonId: animeTable.seasonId,
							opCount: animeTable.opCount,
							edCount: animeTable.edCount,
							insertCount: animeTable.insertCount,
						},
					})
					.run();
			}

			// For child tables, simplest is: delete existing for these annIds then insert fresh.
			// (Safer than trying to upsert composite unique constraints for each child row.)
			for (const a of anime) {
				db.delete(animeNamesTable)
					.where(eq(animeNamesTable.annId, a.annId))
					.run();
				db.delete(animeGenresTable)
					.where(eq(animeGenresTable.annId, a.annId))
					.run();
				db.delete(animeTagsTable)
					.where(eq(animeTagsTable.annId, a.annId))
					.run();
				db.delete(animeSongLinksTable)
					.where(eq(animeSongLinksTable.annId, a.annId))
					.run();

				// names
				if (a.names.length) {
					db.insert(animeNamesTable)
						.values(
							a.names.map((n) => ({
								annId: a.annId,
								language: n.language,
								name: n.name,
							})),
						)
						.run();
				}

				// genres
				if (a.genres.length) {
					// Ensure lookup rows exist (string PK)
					db.insert(genresTable)
						.values(a.genres.map((g) => ({ name: g })))
						.onConflictDoNothing()
						.run();

					// Insert relations
					db.insert(animeGenresTable)
						.values(
							a.genres.map((g) => ({
								annId: a.annId,
								genreName: g,
							})),
						)
						.run();
				}

				// tags
				if (a.tags.length) {
					// Ensure lookup rows exist (string PK)
					db.insert(tagsTable)
						.values(a.tags.map((t) => ({ name: t })))
						.onConflictDoNothing()
						.run();

					// Insert relations
					db.insert(animeTagsTable)
						.values(
							a.tags.map((t) => ({
								annId: a.annId,
								tagName: t,
							})),
						)
						.run();
				}

				// song links
				if (a.songLinks.length) {
					db.insert(animeSongLinksTable)
						.values(
							a.songLinks.map((l) => ({
								annId: a.annId,
								annSongId: l.annSongId,
								type: l.type,
								number: l.number,
								uploaded: l.uploaded,
								rebroadcast: l.rebroadcast,
								dub: l.dub,
							})),
						)
						.run();
				}
			}
		}
	});
}

/**
 * Convenience CLI entrypoint:
 *   bun run src/lib/db/import-amq.ts
 *
 * Requires:
 * - DB_FILE_NAME set in env (see drizzle.config.ts and src/lib/db/index.ts)
 */
if (import.meta.main) {
	const wipeFirst = process.argv.includes("--wipe");
	const quiet = process.argv.includes("--quiet");

	await importAmqData({ wipeFirst, verbose: !quiet });

	// eslint-disable-next-line no-console
	console.info("AMQ import complete");
}