From e24fb58d4e7e3b69485e55c944c3a2fd587be4aa Mon Sep 17 00:00:00 2001 From: Yuri Tatishchev Date: Thu, 5 Feb 2026 00:32:45 -0800 Subject: [PATCH] db: amq json import script --- package.json | 1 + src/lib/db/import-amq.ts | 489 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 490 insertions(+) create mode 100644 src/lib/db/import-amq.ts diff --git a/package.json b/package.json index 2e3df4b..43365e8 100644 --- a/package.json +++ b/package.json @@ -7,6 +7,7 @@ "dev": "vite dev", "build": "vite build", "preview": "bun run build && wrangler dev", + "db:import": "bun run src/lib/db/import-amq.ts", "format": "biome check --write", "prepare": "svelte-kit sync || echo ''", "check": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json", diff --git a/src/lib/db/import-amq.ts b/src/lib/db/import-amq.ts new file mode 100644 index 0000000..fa8c8c2 --- /dev/null +++ b/src/lib/db/import-amq.ts @@ -0,0 +1,489 @@ +import { eq } from "drizzle-orm"; +import { z } from "zod"; +import { + AmqAnimeSchema, + AmqArtistSchema, + AmqGroupSchema, + AmqSongSchema, +} from "$lib/types/amq"; +import amqAnimeJson from "../../../static/data/amq-anime.json" with { + type: "json", +}; +import amqArtistsJson from "../../../static/data/amq-artists.json" with { + type: "json", +}; +import amqGroupsJson from "../../../static/data/amq-groups.json" with { + type: "json", +}; +import amqSongsJson from "../../../static/data/amq-songs.json" with { + type: "json", +}; +import { db } from "./index"; +import { + animeGenresTable, + animeNamesTable, + animeSongLinksTable, + animeTable, + animeTagsTable, + artistAltNamesTable, + artistGroupsTable, + artistsTable, + groupAltNamesTable, + groupArtistMembersTable, + groupGroupMembersTable, + groupsTable, + songsTable, +} from "./schema"; + +/** + * AMQ JSON import routine + * + * Reads JSON from `static/data`: + * - amq-anime.json: array of anime (roughly AmqAnimeSchema, but your local data may include extra fields) + * - amq-songs.json: array of songs (roughly AmqSongSchema, but your local data may include extra fields) + * - amq-artists.json: array of artists (AmqArtistSchema, may include extra fields) + * - amq-groups.json: array of groups (AmqGroupSchema, may include extra fields) + * + * Validates via Zod (with passthrough to tolerate extra properties in JSON), + * then inserts into the normalized Drizzle schema in `src/lib/db/schema/index.ts`. + * + * This file is intended to be runnable in Bun (drizzle-orm/bun-sqlite). + */ + +const AnimeArraySchema = z.array(AmqAnimeSchema.passthrough()); + +type ImportOptions = { + /** + * When true, removes existing AMQ rows before inserting. + * Order matters due to FKs. + */ + wipeFirst?: boolean; + + /** + * When true, prints progress info. + */ + verbose?: boolean; + + /** + * Batch size used for insert chunks, to avoid SQLite parameter limits. + */ + batchSize?: number; +}; + +function chunk(items: T[], size: number): T[][] { + if (size <= 0) return [items]; + const out: T[][] = []; + for (let i = 0; i < items.length; i += size) + out.push(items.slice(i, i + size)); + return out; +} + +function readJsonModule(value: unknown): unknown { + return value; +} + +function zodErrorSummary(prefix: string, err: z.ZodError): string { + const lines = err.issues.slice(0, 50).map((issue) => { + const p = issue.path.length ? issue.path.join(".") : "(root)"; + return `- ${p}: ${issue.message}`; + }); + const more = + err.issues.length > 50 + ? `\n...and ${err.issues.length - 50} more issues` + : ""; + return `${prefix}\n${lines.join("\n")}${more}`; +} + +function categoryNumberToText( + v: number | string | null | undefined, +): string | null { + if (v === null || v === undefined) return null; + return String(v); +} + +/** + * Artists/groups may include extra fields in JSON; we validate the required subset + * and ignore the rest via `.passthrough()`. + */ +const ArtistSubsetSchema = AmqArtistSchema.passthrough(); + +/** + * Your `songs` schema uses `annSongId` as PK and stores `songId` as non-unique. + * JSON may contain nested `artist` / `composer` / `arranger` objects; we ignore them. + */ +const SongSubsetSchema = AmqSongSchema.passthrough(); + +/** + * Your `anime` schema expects IDs that the upstream Zod schema requires. + * Your local `amq-anime.json` sample contains `watched: false` and (in the snippet) is missing aniListId/malId/kitsuId/genres/tags. + * + * We validate with the strict schema (with `.passthrough()` so extra keys are allowed). + * If your JSON truly lacks required keys, validation will fail with a clear message. + */ + +export async function importAmqData( + options: ImportOptions = {}, +): Promise { + const { wipeFirst = false, verbose = true, batchSize = 500 } = options; + + const animeJson = readJsonModule(amqAnimeJson); + const songsJson = readJsonModule(amqSongsJson); + const artistsJson = readJsonModule(amqArtistsJson); + const groupsJson = readJsonModule(amqGroupsJson); + + const animeParsed = AnimeArraySchema.safeParse(animeJson); + if (!animeParsed.success) { + throw new Error( + zodErrorSummary( + `Invalid anime JSON (static/data/amq-anime.json)`, + animeParsed.error, + ), + ); + } + + const songsParsed = z.array(SongSubsetSchema).safeParse(songsJson); + if (!songsParsed.success) { + throw new Error( + zodErrorSummary( + `Invalid songs JSON (static/data/amq-songs.json)`, + songsParsed.error, + ), + ); + } + + const artistsParsed = z.array(ArtistSubsetSchema).safeParse(artistsJson); + if (!artistsParsed.success) { + throw new Error( + zodErrorSummary( + `Invalid artists JSON (static/data/amq-artists.json)`, + artistsParsed.error, + ), + ); + } + + const groupsParsed = z + .array(AmqGroupSchema.passthrough()) + .safeParse(groupsJson); + if (!groupsParsed.success) { + throw new Error( + zodErrorSummary( + `Invalid groups JSON (static/data/amq-groups.json)`, + groupsParsed.error, + ), + ); + } + + const anime = animeParsed.data; + const songs = songsParsed.data; + const artists = artistsParsed.data; + const groups = groupsParsed.data; + + if (verbose) { + // eslint-disable-next-line no-console + console.info( + `AMQ import: ${anime.length} anime, ${songs.length} songs, ${artists.length} artists, ${groups.length} groups`, + ); + } + + // 2) Apply inserts in a transaction + db.transaction(() => { + if (wipeFirst) { + // Child tables first, then parents (respect FKs) + db.delete(animeSongLinksTable).run(); + db.delete(animeNamesTable).run(); + db.delete(animeGenresTable).run(); + db.delete(animeTagsTable).run(); + + // artist/group graph tables + db.delete(groupGroupMembersTable).run(); + db.delete(groupArtistMembersTable).run(); + db.delete(artistGroupsTable).run(); + db.delete(groupAltNamesTable).run(); + db.delete(artistAltNamesTable).run(); + + db.delete(animeTable).run(); + + // songs referenced by anime_song_links; delete after links wiped + db.delete(songsTable).run(); + + // groups referenced by graph tables + db.delete(groupsTable).run(); + + // artists referenced by graph tables + db.delete(artistsTable).run(); + } + + // 2a) Insert artists (core) + { + const artistRows = artists.map((a) => ({ + songArtistId: a.songArtistId, + name: a.name, + })); + + for (const batch of chunk(artistRows, batchSize)) { + if (batch.length === 0) continue; + db.insert(artistsTable) + .values(batch) + .onConflictDoUpdate({ + target: artistsTable.songArtistId, + set: { + name: artistsTable.name, + }, + }) + .run(); + } + } + + // 2a.1) Insert groups (core) + { + const groupRows = groups.map((g) => ({ + songGroupId: g.songGroupId, + name: g.name, + })); + + for (const batch of chunk(groupRows, batchSize)) { + if (batch.length === 0) continue; + db.insert(groupsTable) + .values(batch) + .onConflictDoUpdate({ + target: groupsTable.songGroupId, + set: { + name: groupsTable.name, + }, + }) + .run(); + } + } + + // 2a.2) Insert memberships + alt names + // For these join tables, easiest/most consistent is replace-all (delete then insert), + // since they’re derived arrays and can change over time. + { + db.delete(artistGroupsTable).run(); + db.delete(groupArtistMembersTable).run(); + db.delete(groupGroupMembersTable).run(); + db.delete(artistAltNamesTable).run(); + db.delete(groupAltNamesTable).run(); + + // artist -> groups + const artistGroupRows = artists.flatMap((a) => + a.inGroups.map((songGroupId) => ({ + songArtistId: a.songArtistId, + songGroupId, + })), + ); + for (const batch of chunk(artistGroupRows, batchSize)) { + if (batch.length === 0) continue; + db.insert(artistGroupsTable).values(batch).run(); + } + + // group -> artist members + const groupArtistMemberRows = groups.flatMap((g) => + g.artistMembers.map((songArtistId) => ({ + songGroupId: g.songGroupId, + songArtistId, + })), + ); + for (const batch of chunk(groupArtistMemberRows, batchSize)) { + if (batch.length === 0) continue; + db.insert(groupArtistMembersTable).values(batch).run(); + } + + // group -> group members + const groupGroupMemberRows = groups.flatMap((g) => + g.groupMembers.map((memberSongGroupId) => ({ + songGroupId: g.songGroupId, + memberSongGroupId, + })), + ); + for (const batch of chunk(groupGroupMemberRows, batchSize)) { + if (batch.length === 0) continue; + db.insert(groupGroupMembersTable).values(batch).run(); + } + + // artist alt names: { songArtistId, name } (stored as altSongArtistId in DB) + const artistAltNameRows = artists.flatMap((a) => + a.altNames.map((alt) => ({ + songArtistId: a.songArtistId, + altSongArtistId: alt.songArtistId, + name: alt.name, + })), + ); + for (const batch of chunk(artistAltNameRows, batchSize)) { + if (batch.length === 0) continue; + db.insert(artistAltNamesTable).values(batch).run(); + } + + // group alt names: { songGroupId, name } where object.songGroupId is the context + const groupAltNameRows = groups.flatMap((g) => + g.altNames.map((alt) => ({ + songGroupId: g.songGroupId, + contextSongGroupId: alt.songGroupId, + name: alt.name, + })), + ); + for (const batch of chunk(groupAltNameRows, batchSize)) { + if (batch.length === 0) continue; + db.insert(groupAltNamesTable).values(batch).run(); + } + } + + // 2b) Insert songs + { + const songRows = songs.map((s) => ({ + annSongId: s.annSongId, + songId: s.songId, + name: s.name, + category: s.category, + })); + + for (const batch of chunk(songRows, batchSize)) { + if (batch.length === 0) continue; + db.insert(songsTable) + .values(batch) + .onConflictDoUpdate({ + target: songsTable.annSongId, + set: { + songId: songsTable.songId, + name: songsTable.name, + category: songsTable.category, + }, + }) + .run(); + } + } + + // 2c) Insert anime and its normalized children + { + const animeRows = anime.map((a) => ({ + annId: a.annId, + aniListId: a.aniListId, + malId: a.malId, + kitsuId: a.kitsuId, + categoryName: a.category.name, + categoryNumber: categoryNumberToText(a.category.number), + mainName: a.mainName, + mainNameEn: a.mainNames.EN, + mainNameJa: a.mainNames.JA, + year: a.year, + seasonId: a.seasonId, + opCount: a.opCount, + edCount: a.edCount, + insertCount: a.insertCount, + })); + + for (const batch of chunk(animeRows, batchSize)) { + if (batch.length === 0) continue; + db.insert(animeTable) + .values(batch) + .onConflictDoUpdate({ + target: animeTable.annId, + set: { + aniListId: animeTable.aniListId, + malId: animeTable.malId, + kitsuId: animeTable.kitsuId, + categoryName: animeTable.categoryName, + categoryNumber: animeTable.categoryNumber, + mainName: animeTable.mainName, + mainNameEn: animeTable.mainNameEn, + mainNameJa: animeTable.mainNameJa, + year: animeTable.year, + seasonId: animeTable.seasonId, + opCount: animeTable.opCount, + edCount: animeTable.edCount, + insertCount: animeTable.insertCount, + }, + }) + .run(); + } + + // For child tables, simplest is: delete existing for these annIds then insert fresh. + // (Safer than trying to upsert composite unique constraints for each child row.) + for (const a of anime) { + db.delete(animeNamesTable) + .where(eq(animeNamesTable.annId, a.annId)) + .run(); + db.delete(animeGenresTable) + .where(eq(animeGenresTable.annId, a.annId)) + .run(); + db.delete(animeTagsTable) + .where(eq(animeTagsTable.annId, a.annId)) + .run(); + db.delete(animeSongLinksTable) + .where(eq(animeSongLinksTable.annId, a.annId)) + .run(); + + // names + if (a.names.length) { + db.insert(animeNamesTable) + .values( + a.names.map((n) => ({ + annId: a.annId, + language: n.language, + name: n.name, + })), + ) + .run(); + } + + // genres + if (a.genres.length) { + db.insert(animeGenresTable) + .values( + a.genres.map((g) => ({ + annId: a.annId, + genre: g, + })), + ) + .run(); + } + + // tags + if (a.tags.length) { + db.insert(animeTagsTable) + .values( + a.tags.map((t) => ({ + annId: a.annId, + tag: t, + })), + ) + .run(); + } + + // song links + if (a.songLinks.length) { + db.insert(animeSongLinksTable) + .values( + a.songLinks.map((l) => ({ + annId: a.annId, + annSongId: l.annSongId, + type: l.type, + number: l.number, + uploaded: l.uploaded, + rebroadcast: l.rebroadcast, + dub: l.dub, + })), + ) + .run(); + } + } + } + }); +} + +/** + * Convenience CLI entrypoint: + * bun run src/lib/db/import-amq.ts + * + * Requires: + * - DB_FILE_NAME set in env (see drizzle.config.ts and src/lib/db/index.ts) + */ +if (import.meta.main) { + const wipeFirst = process.argv.includes("--wipe"); + const quiet = process.argv.includes("--quiet"); + + await importAmqData({ wipeFirst, verbose: !quiet }); + + // eslint-disable-next-line no-console + console.info("AMQ import complete"); +}