db: amq json import script

2026-02-05 00:32:45 -08:00
parent c725453e8a
commit e24fb58d4e
2 changed files with 490 additions and 0 deletions
--- a/src/lib/db/import-amq.ts
+++ b/src/lib/db/import-amq.ts
@@ -0,0 +1,489 @@
+import { eq } from "drizzle-orm";
+import { z } from "zod";
+import {
+	AmqAnimeSchema,
+	AmqArtistSchema,
+	AmqGroupSchema,
+	AmqSongSchema,
+} from "$lib/types/amq";
+import amqAnimeJson from "../../../static/data/amq-anime.json" with {
+	type: "json",
+};
+import amqArtistsJson from "../../../static/data/amq-artists.json" with {
+	type: "json",
+};
+import amqGroupsJson from "../../../static/data/amq-groups.json" with {
+	type: "json",
+};
+import amqSongsJson from "../../../static/data/amq-songs.json" with {
+	type: "json",
+};
+import { db } from "./index";
+import {
+	animeGenresTable,
+	animeNamesTable,
+	animeSongLinksTable,
+	animeTable,
+	animeTagsTable,
+	artistAltNamesTable,
+	artistGroupsTable,
+	artistsTable,
+	groupAltNamesTable,
+	groupArtistMembersTable,
+	groupGroupMembersTable,
+	groupsTable,
+	songsTable,
+} from "./schema";
+
+/**
+ * AMQ JSON import routine
+ *
+ * Reads JSON from `static/data`:
+ * - amq-anime.json: array of anime (roughly AmqAnimeSchema, but your local data may include extra fields)
+ * - amq-songs.json: array of songs (roughly AmqSongSchema, but your local data may include extra fields)
+ * - amq-artists.json: array of artists (AmqArtistSchema, may include extra fields)
+ * - amq-groups.json: array of groups (AmqGroupSchema, may include extra fields)
+ *
+ * Validates via Zod (with passthrough to tolerate extra properties in JSON),
+ * then inserts into the normalized Drizzle schema in `src/lib/db/schema/index.ts`.
+ *
+ * This file is intended to be runnable in Bun (drizzle-orm/bun-sqlite).
+ */
+
+const AnimeArraySchema = z.array(AmqAnimeSchema.passthrough());
+
+type ImportOptions = {
+	/**
+	 * When true, removes existing AMQ rows before inserting.
+	 * Order matters due to FKs.
+	 */
+	wipeFirst?: boolean;
+
+	/**
+	 * When true, prints progress info.
+	 */
+	verbose?: boolean;
+
+	/**
+	 * Batch size used for insert chunks, to avoid SQLite parameter limits.
+	 */
+	batchSize?: number;
+};
+
+function chunk<T>(items: T[], size: number): T[][] {
+	if (size <= 0) return [items];
+	const out: T[][] = [];
+	for (let i = 0; i < items.length; i += size)
+		out.push(items.slice(i, i + size));
+	return out;
+}
+
+function readJsonModule(value: unknown): unknown {
+	return value;
+}
+
+function zodErrorSummary(prefix: string, err: z.ZodError): string {
+	const lines = err.issues.slice(0, 50).map((issue) => {
+		const p = issue.path.length ? issue.path.join(".") : "(root)";
+		return `- ${p}: ${issue.message}`;
+	});
+	const more =
+		err.issues.length > 50
+			? `\n...and ${err.issues.length - 50} more issues`
+			: "";
+	return `${prefix}\n${lines.join("\n")}${more}`;
+}
+
+function categoryNumberToText(
+	v: number | string | null | undefined,
+): string | null {
+	if (v === null || v === undefined) return null;
+	return String(v);
+}
+
+/**
+ * Artists/groups may include extra fields in JSON; we validate the required subset
+ * and ignore the rest via `.passthrough()`.
+ */
+const ArtistSubsetSchema = AmqArtistSchema.passthrough();
+
+/**
+ * Your `songs` schema uses `annSongId` as PK and stores `songId` as non-unique.
+ * JSON may contain nested `artist` / `composer` / `arranger` objects; we ignore them.
+ */
+const SongSubsetSchema = AmqSongSchema.passthrough();
+
+/**
+ * Your `anime` schema expects IDs that the upstream Zod schema requires.
+ * Your local `amq-anime.json` sample contains `watched: false` and (in the snippet) is missing aniListId/malId/kitsuId/genres/tags.
+ *
+ * We validate with the strict schema (with `.passthrough()` so extra keys are allowed).
+ * If your JSON truly lacks required keys, validation will fail with a clear message.
+ */
+
+export async function importAmqData(
+	options: ImportOptions = {},
+): Promise<void> {
+	const { wipeFirst = false, verbose = true, batchSize = 500 } = options;
+
+	const animeJson = readJsonModule(amqAnimeJson);
+	const songsJson = readJsonModule(amqSongsJson);
+	const artistsJson = readJsonModule(amqArtistsJson);
+	const groupsJson = readJsonModule(amqGroupsJson);
+
+	const animeParsed = AnimeArraySchema.safeParse(animeJson);
+	if (!animeParsed.success) {
+		throw new Error(
+			zodErrorSummary(
+				`Invalid anime JSON (static/data/amq-anime.json)`,
+				animeParsed.error,
+			),
+		);
+	}
+
+	const songsParsed = z.array(SongSubsetSchema).safeParse(songsJson);
+	if (!songsParsed.success) {
+		throw new Error(
+			zodErrorSummary(
+				`Invalid songs JSON (static/data/amq-songs.json)`,
+				songsParsed.error,
+			),
+		);
+	}
+
+	const artistsParsed = z.array(ArtistSubsetSchema).safeParse(artistsJson);
+	if (!artistsParsed.success) {
+		throw new Error(
+			zodErrorSummary(
+				`Invalid artists JSON (static/data/amq-artists.json)`,
+				artistsParsed.error,
+			),
+		);
+	}
+
+	const groupsParsed = z
+		.array(AmqGroupSchema.passthrough())
+		.safeParse(groupsJson);
+	if (!groupsParsed.success) {
+		throw new Error(
+			zodErrorSummary(
+				`Invalid groups JSON (static/data/amq-groups.json)`,
+				groupsParsed.error,
+			),
+		);
+	}
+
+	const anime = animeParsed.data;
+	const songs = songsParsed.data;
+	const artists = artistsParsed.data;
+	const groups = groupsParsed.data;
+
+	if (verbose) {
+		// eslint-disable-next-line no-console
+		console.info(
+			`AMQ import: ${anime.length} anime, ${songs.length} songs, ${artists.length} artists, ${groups.length} groups`,
+		);
+	}
+
+	// 2) Apply inserts in a transaction
+	db.transaction(() => {
+		if (wipeFirst) {
+			// Child tables first, then parents (respect FKs)
+			db.delete(animeSongLinksTable).run();
+			db.delete(animeNamesTable).run();
+			db.delete(animeGenresTable).run();
+			db.delete(animeTagsTable).run();
+
+			// artist/group graph tables
+			db.delete(groupGroupMembersTable).run();
+			db.delete(groupArtistMembersTable).run();
+			db.delete(artistGroupsTable).run();
+			db.delete(groupAltNamesTable).run();
+			db.delete(artistAltNamesTable).run();
+
+			db.delete(animeTable).run();
+
+			// songs referenced by anime_song_links; delete after links wiped
+			db.delete(songsTable).run();
+
+			// groups referenced by graph tables
+			db.delete(groupsTable).run();
+
+			// artists referenced by graph tables
+			db.delete(artistsTable).run();
+		}
+
+		// 2a) Insert artists (core)
+		{
+			const artistRows = artists.map((a) => ({
+				songArtistId: a.songArtistId,
+				name: a.name,
+			}));
+
+			for (const batch of chunk(artistRows, batchSize)) {
+				if (batch.length === 0) continue;
+				db.insert(artistsTable)
+					.values(batch)
+					.onConflictDoUpdate({
+						target: artistsTable.songArtistId,
+						set: {
+							name: artistsTable.name,
+						},
+					})
+					.run();
+			}
+		}
+
+		// 2a.1) Insert groups (core)
+		{
+			const groupRows = groups.map((g) => ({
+				songGroupId: g.songGroupId,
+				name: g.name,
+			}));
+
+			for (const batch of chunk(groupRows, batchSize)) {
+				if (batch.length === 0) continue;
+				db.insert(groupsTable)
+					.values(batch)
+					.onConflictDoUpdate({
+						target: groupsTable.songGroupId,
+						set: {
+							name: groupsTable.name,
+						},
+					})
+					.run();
+			}
+		}
+
+		// 2a.2) Insert memberships + alt names
+		// For these join tables, easiest/most consistent is replace-all (delete then insert),
+		// since they’re derived arrays and can change over time.
+		{
+			db.delete(artistGroupsTable).run();
+			db.delete(groupArtistMembersTable).run();
+			db.delete(groupGroupMembersTable).run();
+			db.delete(artistAltNamesTable).run();
+			db.delete(groupAltNamesTable).run();
+
+			// artist -> groups
+			const artistGroupRows = artists.flatMap((a) =>
+				a.inGroups.map((songGroupId) => ({
+					songArtistId: a.songArtistId,
+					songGroupId,
+				})),
+			);
+			for (const batch of chunk(artistGroupRows, batchSize)) {
+				if (batch.length === 0) continue;
+				db.insert(artistGroupsTable).values(batch).run();
+			}
+
+			// group -> artist members
+			const groupArtistMemberRows = groups.flatMap((g) =>
+				g.artistMembers.map((songArtistId) => ({
+					songGroupId: g.songGroupId,
+					songArtistId,
+				})),
+			);
+			for (const batch of chunk(groupArtistMemberRows, batchSize)) {
+				if (batch.length === 0) continue;
+				db.insert(groupArtistMembersTable).values(batch).run();
+			}
+
+			// group -> group members
+			const groupGroupMemberRows = groups.flatMap((g) =>
+				g.groupMembers.map((memberSongGroupId) => ({
+					songGroupId: g.songGroupId,
+					memberSongGroupId,
+				})),
+			);
+			for (const batch of chunk(groupGroupMemberRows, batchSize)) {
+				if (batch.length === 0) continue;
+				db.insert(groupGroupMembersTable).values(batch).run();
+			}
+
+			// artist alt names: { songArtistId, name } (stored as altSongArtistId in DB)
+			const artistAltNameRows = artists.flatMap((a) =>
+				a.altNames.map((alt) => ({
+					songArtistId: a.songArtistId,
+					altSongArtistId: alt.songArtistId,
+					name: alt.name,
+				})),
+			);
+			for (const batch of chunk(artistAltNameRows, batchSize)) {
+				if (batch.length === 0) continue;
+				db.insert(artistAltNamesTable).values(batch).run();
+			}
+
+			// group alt names: { songGroupId, name } where object.songGroupId is the context
+			const groupAltNameRows = groups.flatMap((g) =>
+				g.altNames.map((alt) => ({
+					songGroupId: g.songGroupId,
+					contextSongGroupId: alt.songGroupId,
+					name: alt.name,
+				})),
+			);
+			for (const batch of chunk(groupAltNameRows, batchSize)) {
+				if (batch.length === 0) continue;
+				db.insert(groupAltNamesTable).values(batch).run();
+			}
+		}
+
+		// 2b) Insert songs
+		{
+			const songRows = songs.map((s) => ({
+				annSongId: s.annSongId,
+				songId: s.songId,
+				name: s.name,
+				category: s.category,
+			}));
+
+			for (const batch of chunk(songRows, batchSize)) {
+				if (batch.length === 0) continue;
+				db.insert(songsTable)
+					.values(batch)
+					.onConflictDoUpdate({
+						target: songsTable.annSongId,
+						set: {
+							songId: songsTable.songId,
+							name: songsTable.name,
+							category: songsTable.category,
+						},
+					})
+					.run();
+			}
+		}
+
+		// 2c) Insert anime and its normalized children
+		{
+			const animeRows = anime.map((a) => ({
+				annId: a.annId,
+				aniListId: a.aniListId,
+				malId: a.malId,
+				kitsuId: a.kitsuId,
+				categoryName: a.category.name,
+				categoryNumber: categoryNumberToText(a.category.number),
+				mainName: a.mainName,
+				mainNameEn: a.mainNames.EN,
+				mainNameJa: a.mainNames.JA,
+				year: a.year,
+				seasonId: a.seasonId,
+				opCount: a.opCount,
+				edCount: a.edCount,
+				insertCount: a.insertCount,
+			}));
+
+			for (const batch of chunk(animeRows, batchSize)) {
+				if (batch.length === 0) continue;
+				db.insert(animeTable)
+					.values(batch)
+					.onConflictDoUpdate({
+						target: animeTable.annId,
+						set: {
+							aniListId: animeTable.aniListId,
+							malId: animeTable.malId,
+							kitsuId: animeTable.kitsuId,
+							categoryName: animeTable.categoryName,
+							categoryNumber: animeTable.categoryNumber,
+							mainName: animeTable.mainName,
+							mainNameEn: animeTable.mainNameEn,
+							mainNameJa: animeTable.mainNameJa,
+							year: animeTable.year,
+							seasonId: animeTable.seasonId,
+							opCount: animeTable.opCount,
+							edCount: animeTable.edCount,
+							insertCount: animeTable.insertCount,
+						},
+					})
+					.run();
+			}
+
+			// For child tables, simplest is: delete existing for these annIds then insert fresh.
+			// (Safer than trying to upsert composite unique constraints for each child row.)
+			for (const a of anime) {
+				db.delete(animeNamesTable)
+					.where(eq(animeNamesTable.annId, a.annId))
+					.run();
+				db.delete(animeGenresTable)
+					.where(eq(animeGenresTable.annId, a.annId))
+					.run();
+				db.delete(animeTagsTable)
+					.where(eq(animeTagsTable.annId, a.annId))
+					.run();
+				db.delete(animeSongLinksTable)
+					.where(eq(animeSongLinksTable.annId, a.annId))
+					.run();
+
+				// names
+				if (a.names.length) {
+					db.insert(animeNamesTable)
+						.values(
+							a.names.map((n) => ({
+								annId: a.annId,
+								language: n.language,
+								name: n.name,
+							})),
+						)
+						.run();
+				}
+
+				// genres
+				if (a.genres.length) {
+					db.insert(animeGenresTable)
+						.values(
+							a.genres.map((g) => ({
+								annId: a.annId,
+								genre: g,
+							})),
+						)
+						.run();
+				}
+
+				// tags
+				if (a.tags.length) {
+					db.insert(animeTagsTable)
+						.values(
+							a.tags.map((t) => ({
+								annId: a.annId,
+								tag: t,
+							})),
+						)
+						.run();
+				}
+
+				// song links
+				if (a.songLinks.length) {
+					db.insert(animeSongLinksTable)
+						.values(
+							a.songLinks.map((l) => ({
+								annId: a.annId,
+								annSongId: l.annSongId,
+								type: l.type,
+								number: l.number,
+								uploaded: l.uploaded,
+								rebroadcast: l.rebroadcast,
+								dub: l.dub,
+							})),
+						)
+						.run();
+				}
+			}
+		}
+	});
+}
+
+/**
+ * Convenience CLI entrypoint:
+ *   bun run src/lib/db/import-amq.ts
+ *
+ * Requires:
+ * - DB_FILE_NAME set in env (see drizzle.config.ts and src/lib/db/index.ts)
+ */
+if (import.meta.main) {
+	const wipeFirst = process.argv.includes("--wipe");
+	const quiet = process.argv.includes("--quiet");
+
+	await importAmqData({ wipeFirst, verbose: !quiet });
+
+	// eslint-disable-next-line no-console
+	console.info("AMQ import complete");
+}