db: amq json import script

This commit is contained in:
2026-02-05 00:32:45 -08:00
parent c725453e8a
commit e24fb58d4e
2 changed files with 490 additions and 0 deletions

489
src/lib/db/import-amq.ts Normal file
View File

@@ -0,0 +1,489 @@
import { eq } from "drizzle-orm";
import { z } from "zod";
import {
AmqAnimeSchema,
AmqArtistSchema,
AmqGroupSchema,
AmqSongSchema,
} from "$lib/types/amq";
import amqAnimeJson from "../../../static/data/amq-anime.json" with {
type: "json",
};
import amqArtistsJson from "../../../static/data/amq-artists.json" with {
type: "json",
};
import amqGroupsJson from "../../../static/data/amq-groups.json" with {
type: "json",
};
import amqSongsJson from "../../../static/data/amq-songs.json" with {
type: "json",
};
import { db } from "./index";
import {
animeGenresTable,
animeNamesTable,
animeSongLinksTable,
animeTable,
animeTagsTable,
artistAltNamesTable,
artistGroupsTable,
artistsTable,
groupAltNamesTable,
groupArtistMembersTable,
groupGroupMembersTable,
groupsTable,
songsTable,
} from "./schema";
/**
* AMQ JSON import routine
*
* Reads JSON from `static/data`:
* - amq-anime.json: array of anime (roughly AmqAnimeSchema, but your local data may include extra fields)
* - amq-songs.json: array of songs (roughly AmqSongSchema, but your local data may include extra fields)
* - amq-artists.json: array of artists (AmqArtistSchema, may include extra fields)
* - amq-groups.json: array of groups (AmqGroupSchema, may include extra fields)
*
* Validates via Zod (with passthrough to tolerate extra properties in JSON),
* then inserts into the normalized Drizzle schema in `src/lib/db/schema/index.ts`.
*
* This file is intended to be runnable in Bun (drizzle-orm/bun-sqlite).
*/
const AnimeArraySchema = z.array(AmqAnimeSchema.passthrough());
type ImportOptions = {
/**
* When true, removes existing AMQ rows before inserting.
* Order matters due to FKs.
*/
wipeFirst?: boolean;
/**
* When true, prints progress info.
*/
verbose?: boolean;
/**
* Batch size used for insert chunks, to avoid SQLite parameter limits.
*/
batchSize?: number;
};
function chunk<T>(items: T[], size: number): T[][] {
if (size <= 0) return [items];
const out: T[][] = [];
for (let i = 0; i < items.length; i += size)
out.push(items.slice(i, i + size));
return out;
}
function readJsonModule(value: unknown): unknown {
return value;
}
function zodErrorSummary(prefix: string, err: z.ZodError): string {
const lines = err.issues.slice(0, 50).map((issue) => {
const p = issue.path.length ? issue.path.join(".") : "(root)";
return `- ${p}: ${issue.message}`;
});
const more =
err.issues.length > 50
? `\n...and ${err.issues.length - 50} more issues`
: "";
return `${prefix}\n${lines.join("\n")}${more}`;
}
function categoryNumberToText(
v: number | string | null | undefined,
): string | null {
if (v === null || v === undefined) return null;
return String(v);
}
/**
* Artists/groups may include extra fields in JSON; we validate the required subset
* and ignore the rest via `.passthrough()`.
*/
const ArtistSubsetSchema = AmqArtistSchema.passthrough();
/**
* Your `songs` schema uses `annSongId` as PK and stores `songId` as non-unique.
* JSON may contain nested `artist` / `composer` / `arranger` objects; we ignore them.
*/
const SongSubsetSchema = AmqSongSchema.passthrough();
/**
* Your `anime` schema expects IDs that the upstream Zod schema requires.
* Your local `amq-anime.json` sample contains `watched: false` and (in the snippet) is missing aniListId/malId/kitsuId/genres/tags.
*
* We validate with the strict schema (with `.passthrough()` so extra keys are allowed).
* If your JSON truly lacks required keys, validation will fail with a clear message.
*/
export async function importAmqData(
options: ImportOptions = {},
): Promise<void> {
const { wipeFirst = false, verbose = true, batchSize = 500 } = options;
const animeJson = readJsonModule(amqAnimeJson);
const songsJson = readJsonModule(amqSongsJson);
const artistsJson = readJsonModule(amqArtistsJson);
const groupsJson = readJsonModule(amqGroupsJson);
const animeParsed = AnimeArraySchema.safeParse(animeJson);
if (!animeParsed.success) {
throw new Error(
zodErrorSummary(
`Invalid anime JSON (static/data/amq-anime.json)`,
animeParsed.error,
),
);
}
const songsParsed = z.array(SongSubsetSchema).safeParse(songsJson);
if (!songsParsed.success) {
throw new Error(
zodErrorSummary(
`Invalid songs JSON (static/data/amq-songs.json)`,
songsParsed.error,
),
);
}
const artistsParsed = z.array(ArtistSubsetSchema).safeParse(artistsJson);
if (!artistsParsed.success) {
throw new Error(
zodErrorSummary(
`Invalid artists JSON (static/data/amq-artists.json)`,
artistsParsed.error,
),
);
}
const groupsParsed = z
.array(AmqGroupSchema.passthrough())
.safeParse(groupsJson);
if (!groupsParsed.success) {
throw new Error(
zodErrorSummary(
`Invalid groups JSON (static/data/amq-groups.json)`,
groupsParsed.error,
),
);
}
const anime = animeParsed.data;
const songs = songsParsed.data;
const artists = artistsParsed.data;
const groups = groupsParsed.data;
if (verbose) {
// eslint-disable-next-line no-console
console.info(
`AMQ import: ${anime.length} anime, ${songs.length} songs, ${artists.length} artists, ${groups.length} groups`,
);
}
// 2) Apply inserts in a transaction
db.transaction(() => {
if (wipeFirst) {
// Child tables first, then parents (respect FKs)
db.delete(animeSongLinksTable).run();
db.delete(animeNamesTable).run();
db.delete(animeGenresTable).run();
db.delete(animeTagsTable).run();
// artist/group graph tables
db.delete(groupGroupMembersTable).run();
db.delete(groupArtistMembersTable).run();
db.delete(artistGroupsTable).run();
db.delete(groupAltNamesTable).run();
db.delete(artistAltNamesTable).run();
db.delete(animeTable).run();
// songs referenced by anime_song_links; delete after links wiped
db.delete(songsTable).run();
// groups referenced by graph tables
db.delete(groupsTable).run();
// artists referenced by graph tables
db.delete(artistsTable).run();
}
// 2a) Insert artists (core)
{
const artistRows = artists.map((a) => ({
songArtistId: a.songArtistId,
name: a.name,
}));
for (const batch of chunk(artistRows, batchSize)) {
if (batch.length === 0) continue;
db.insert(artistsTable)
.values(batch)
.onConflictDoUpdate({
target: artistsTable.songArtistId,
set: {
name: artistsTable.name,
},
})
.run();
}
}
// 2a.1) Insert groups (core)
{
const groupRows = groups.map((g) => ({
songGroupId: g.songGroupId,
name: g.name,
}));
for (const batch of chunk(groupRows, batchSize)) {
if (batch.length === 0) continue;
db.insert(groupsTable)
.values(batch)
.onConflictDoUpdate({
target: groupsTable.songGroupId,
set: {
name: groupsTable.name,
},
})
.run();
}
}
// 2a.2) Insert memberships + alt names
// For these join tables, easiest/most consistent is replace-all (delete then insert),
// since theyre derived arrays and can change over time.
{
db.delete(artistGroupsTable).run();
db.delete(groupArtistMembersTable).run();
db.delete(groupGroupMembersTable).run();
db.delete(artistAltNamesTable).run();
db.delete(groupAltNamesTable).run();
// artist -> groups
const artistGroupRows = artists.flatMap((a) =>
a.inGroups.map((songGroupId) => ({
songArtistId: a.songArtistId,
songGroupId,
})),
);
for (const batch of chunk(artistGroupRows, batchSize)) {
if (batch.length === 0) continue;
db.insert(artistGroupsTable).values(batch).run();
}
// group -> artist members
const groupArtistMemberRows = groups.flatMap((g) =>
g.artistMembers.map((songArtistId) => ({
songGroupId: g.songGroupId,
songArtistId,
})),
);
for (const batch of chunk(groupArtistMemberRows, batchSize)) {
if (batch.length === 0) continue;
db.insert(groupArtistMembersTable).values(batch).run();
}
// group -> group members
const groupGroupMemberRows = groups.flatMap((g) =>
g.groupMembers.map((memberSongGroupId) => ({
songGroupId: g.songGroupId,
memberSongGroupId,
})),
);
for (const batch of chunk(groupGroupMemberRows, batchSize)) {
if (batch.length === 0) continue;
db.insert(groupGroupMembersTable).values(batch).run();
}
// artist alt names: { songArtistId, name } (stored as altSongArtistId in DB)
const artistAltNameRows = artists.flatMap((a) =>
a.altNames.map((alt) => ({
songArtistId: a.songArtistId,
altSongArtistId: alt.songArtistId,
name: alt.name,
})),
);
for (const batch of chunk(artistAltNameRows, batchSize)) {
if (batch.length === 0) continue;
db.insert(artistAltNamesTable).values(batch).run();
}
// group alt names: { songGroupId, name } where object.songGroupId is the context
const groupAltNameRows = groups.flatMap((g) =>
g.altNames.map((alt) => ({
songGroupId: g.songGroupId,
contextSongGroupId: alt.songGroupId,
name: alt.name,
})),
);
for (const batch of chunk(groupAltNameRows, batchSize)) {
if (batch.length === 0) continue;
db.insert(groupAltNamesTable).values(batch).run();
}
}
// 2b) Insert songs
{
const songRows = songs.map((s) => ({
annSongId: s.annSongId,
songId: s.songId,
name: s.name,
category: s.category,
}));
for (const batch of chunk(songRows, batchSize)) {
if (batch.length === 0) continue;
db.insert(songsTable)
.values(batch)
.onConflictDoUpdate({
target: songsTable.annSongId,
set: {
songId: songsTable.songId,
name: songsTable.name,
category: songsTable.category,
},
})
.run();
}
}
// 2c) Insert anime and its normalized children
{
const animeRows = anime.map((a) => ({
annId: a.annId,
aniListId: a.aniListId,
malId: a.malId,
kitsuId: a.kitsuId,
categoryName: a.category.name,
categoryNumber: categoryNumberToText(a.category.number),
mainName: a.mainName,
mainNameEn: a.mainNames.EN,
mainNameJa: a.mainNames.JA,
year: a.year,
seasonId: a.seasonId,
opCount: a.opCount,
edCount: a.edCount,
insertCount: a.insertCount,
}));
for (const batch of chunk(animeRows, batchSize)) {
if (batch.length === 0) continue;
db.insert(animeTable)
.values(batch)
.onConflictDoUpdate({
target: animeTable.annId,
set: {
aniListId: animeTable.aniListId,
malId: animeTable.malId,
kitsuId: animeTable.kitsuId,
categoryName: animeTable.categoryName,
categoryNumber: animeTable.categoryNumber,
mainName: animeTable.mainName,
mainNameEn: animeTable.mainNameEn,
mainNameJa: animeTable.mainNameJa,
year: animeTable.year,
seasonId: animeTable.seasonId,
opCount: animeTable.opCount,
edCount: animeTable.edCount,
insertCount: animeTable.insertCount,
},
})
.run();
}
// For child tables, simplest is: delete existing for these annIds then insert fresh.
// (Safer than trying to upsert composite unique constraints for each child row.)
for (const a of anime) {
db.delete(animeNamesTable)
.where(eq(animeNamesTable.annId, a.annId))
.run();
db.delete(animeGenresTable)
.where(eq(animeGenresTable.annId, a.annId))
.run();
db.delete(animeTagsTable)
.where(eq(animeTagsTable.annId, a.annId))
.run();
db.delete(animeSongLinksTable)
.where(eq(animeSongLinksTable.annId, a.annId))
.run();
// names
if (a.names.length) {
db.insert(animeNamesTable)
.values(
a.names.map((n) => ({
annId: a.annId,
language: n.language,
name: n.name,
})),
)
.run();
}
// genres
if (a.genres.length) {
db.insert(animeGenresTable)
.values(
a.genres.map((g) => ({
annId: a.annId,
genre: g,
})),
)
.run();
}
// tags
if (a.tags.length) {
db.insert(animeTagsTable)
.values(
a.tags.map((t) => ({
annId: a.annId,
tag: t,
})),
)
.run();
}
// song links
if (a.songLinks.length) {
db.insert(animeSongLinksTable)
.values(
a.songLinks.map((l) => ({
annId: a.annId,
annSongId: l.annSongId,
type: l.type,
number: l.number,
uploaded: l.uploaded,
rebroadcast: l.rebroadcast,
dub: l.dub,
})),
)
.run();
}
}
}
});
}
/**
* Convenience CLI entrypoint:
* bun run src/lib/db/import-amq.ts
*
* Requires:
* - DB_FILE_NAME set in env (see drizzle.config.ts and src/lib/db/index.ts)
*/
if (import.meta.main) {
const wipeFirst = process.argv.includes("--wipe");
const quiet = process.argv.includes("--quiet");
await importAmqData({ wipeFirst, verbose: !quiet });
// eslint-disable-next-line no-console
console.info("AMQ import complete");
}