Files
amqtrain/src/lib/db/import-amq.ts

506 lines
13 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import { eq } from "drizzle-orm";
import { z } from "zod";
import {
AmqAnimeSchema,
AmqArtistSchema,
AmqGroupSchema,
AmqSongSchema,
} from "$lib/types/amq";
import amqAnimeJson from "../../../static/data/amq-anime.json" with {
type: "json",
};
import amqArtistsJson from "../../../static/data/amq-artists.json" with {
type: "json",
};
import amqGroupsJson from "../../../static/data/amq-groups.json" with {
type: "json",
};
import amqSongsJson from "../../../static/data/amq-songs.json" with {
type: "json",
};
import { db } from "./index";
import {
animeGenresTable,
animeNamesTable,
animeSongLinksTable,
animeTable,
animeTagsTable,
artistAltNamesTable,
artistGroupsTable,
artistsTable,
genresTable,
groupAltNamesTable,
groupArtistMembersTable,
groupGroupMembersTable,
groupsTable,
songsTable,
tagsTable,
} from "./schema";
/**
* AMQ JSON import routine
*
* Reads JSON from `static/data`:
* - amq-anime.json: array of anime (roughly AmqAnimeSchema, but your local data may include extra fields)
* - amq-songs.json: array of songs (roughly AmqSongSchema, but your local data may include extra fields)
* - amq-artists.json: array of artists (AmqArtistSchema, may include extra fields)
* - amq-groups.json: array of groups (AmqGroupSchema, may include extra fields)
*
* Validates via Zod (with passthrough to tolerate extra properties in JSON),
* then inserts into the normalized Drizzle schema in `src/lib/db/schema/index.ts`.
*
* This file is intended to be runnable in Bun (drizzle-orm/bun-sqlite).
*/
const AnimeArraySchema = z.array(AmqAnimeSchema.passthrough());
type ImportOptions = {
/**
* When true, removes existing AMQ rows before inserting.
* Order matters due to FKs.
*/
wipeFirst?: boolean;
/**
* When true, prints progress info.
*/
verbose?: boolean;
/**
* Batch size used for insert chunks, to avoid SQLite parameter limits.
*/
batchSize?: number;
};
function chunk<T>(items: T[], size: number): T[][] {
if (size <= 0) return [items];
const out: T[][] = [];
for (let i = 0; i < items.length; i += size)
out.push(items.slice(i, i + size));
return out;
}
function readJsonModule(value: unknown): unknown {
return value;
}
function zodErrorSummary(prefix: string, err: z.ZodError): string {
const lines = err.issues.slice(0, 50).map((issue) => {
const p = issue.path.length ? issue.path.join(".") : "(root)";
return `- ${p}: ${issue.message}`;
});
const more =
err.issues.length > 50
? `\n...and ${err.issues.length - 50} more issues`
: "";
return `${prefix}\n${lines.join("\n")}${more}`;
}
function categoryNumberToText(
v: number | string | null | undefined,
): string | null {
if (v === null || v === undefined) return null;
return String(v);
}
/**
* Artists/groups may include extra fields in JSON; we validate the required subset
* and ignore the rest via `.passthrough()`.
*/
const ArtistSubsetSchema = AmqArtistSchema.passthrough();
/**
* Your `songs` schema uses `annSongId` as PK and stores `songId` as non-unique.
* JSON may contain nested `artist` / `composer` / `arranger` objects; we ignore them.
*/
const SongSubsetSchema = AmqSongSchema.passthrough();
/**
* Your `anime` schema expects IDs that the upstream Zod schema requires.
* Your local `amq-anime.json` sample contains `watched: false` and (in the snippet) is missing aniListId/malId/kitsuId/genres/tags.
*
* We validate with the strict schema (with `.passthrough()` so extra keys are allowed).
* If your JSON truly lacks required keys, validation will fail with a clear message.
*/
export async function importAmqData(
options: ImportOptions = {},
): Promise<void> {
const { wipeFirst = false, verbose = true, batchSize = 500 } = options;
const animeJson = readJsonModule(amqAnimeJson);
const songsJson = readJsonModule(amqSongsJson);
const artistsJson = readJsonModule(amqArtistsJson);
const groupsJson = readJsonModule(amqGroupsJson);
const animeParsed = AnimeArraySchema.safeParse(animeJson);
if (!animeParsed.success) {
throw new Error(
zodErrorSummary(
`Invalid anime JSON (static/data/amq-anime.json)`,
animeParsed.error,
),
);
}
const songsParsed = z.array(SongSubsetSchema).safeParse(songsJson);
if (!songsParsed.success) {
throw new Error(
zodErrorSummary(
`Invalid songs JSON (static/data/amq-songs.json)`,
songsParsed.error,
),
);
}
const artistsParsed = z.array(ArtistSubsetSchema).safeParse(artistsJson);
if (!artistsParsed.success) {
throw new Error(
zodErrorSummary(
`Invalid artists JSON (static/data/amq-artists.json)`,
artistsParsed.error,
),
);
}
const groupsParsed = z
.array(AmqGroupSchema.passthrough())
.safeParse(groupsJson);
if (!groupsParsed.success) {
throw new Error(
zodErrorSummary(
`Invalid groups JSON (static/data/amq-groups.json)`,
groupsParsed.error,
),
);
}
const anime = animeParsed.data;
const songs = songsParsed.data;
const artists = artistsParsed.data;
const groups = groupsParsed.data;
if (verbose) {
// eslint-disable-next-line no-console
console.info(
`AMQ import: ${anime.length} anime, ${songs.length} songs, ${artists.length} artists, ${groups.length} groups`,
);
}
// 2) Apply inserts in a transaction
db.transaction(() => {
if (wipeFirst) {
// Child tables first, then parents (respect FKs)
db.delete(animeSongLinksTable).run();
db.delete(animeNamesTable).run();
db.delete(animeGenresTable).run();
db.delete(animeTagsTable).run();
// artist/group graph tables
db.delete(groupGroupMembersTable).run();
db.delete(groupArtistMembersTable).run();
db.delete(artistGroupsTable).run();
db.delete(groupAltNamesTable).run();
db.delete(artistAltNamesTable).run();
db.delete(animeTable).run();
// songs referenced by anime_song_links; delete after links wiped
db.delete(songsTable).run();
// groups referenced by graph tables
db.delete(groupsTable).run();
// artists referenced by graph tables
db.delete(artistsTable).run();
}
// 2a) Insert artists (core)
{
const artistRows = artists.map((a) => ({
songArtistId: a.songArtistId,
name: a.name,
}));
for (const batch of chunk(artistRows, batchSize)) {
if (batch.length === 0) continue;
db.insert(artistsTable)
.values(batch)
.onConflictDoUpdate({
target: artistsTable.songArtistId,
set: {
name: artistsTable.name,
},
})
.run();
}
}
// 2a.1) Insert groups (core)
{
const groupRows = groups.map((g) => ({
songGroupId: g.songGroupId,
name: g.name,
}));
for (const batch of chunk(groupRows, batchSize)) {
if (batch.length === 0) continue;
db.insert(groupsTable)
.values(batch)
.onConflictDoUpdate({
target: groupsTable.songGroupId,
set: {
name: groupsTable.name,
},
})
.run();
}
}
// 2a.2) Insert memberships + alt names
// For these join tables, easiest/most consistent is replace-all (delete then insert),
// since theyre derived arrays and can change over time.
{
db.delete(artistGroupsTable).run();
db.delete(groupArtistMembersTable).run();
db.delete(groupGroupMembersTable).run();
db.delete(artistAltNamesTable).run();
db.delete(groupAltNamesTable).run();
// artist -> groups
const artistGroupRows = artists.flatMap((a) =>
a.inGroups.map((songGroupId) => ({
songArtistId: a.songArtistId,
songGroupId,
})),
);
for (const batch of chunk(artistGroupRows, batchSize)) {
if (batch.length === 0) continue;
db.insert(artistGroupsTable).values(batch).run();
}
// group -> artist members
const groupArtistMemberRows = groups.flatMap((g) =>
g.artistMembers.map((songArtistId) => ({
songGroupId: g.songGroupId,
songArtistId,
})),
);
for (const batch of chunk(groupArtistMemberRows, batchSize)) {
if (batch.length === 0) continue;
db.insert(groupArtistMembersTable).values(batch).run();
}
// group -> group members
const groupGroupMemberRows = groups.flatMap((g) =>
g.groupMembers.map((memberSongGroupId) => ({
songGroupId: g.songGroupId,
memberSongGroupId,
})),
);
for (const batch of chunk(groupGroupMemberRows, batchSize)) {
if (batch.length === 0) continue;
db.insert(groupGroupMembersTable).values(batch).run();
}
// artist alt names: { songArtistId, name } (stored as altSongArtistId in DB)
const artistAltNameRows = artists.flatMap((a) =>
a.altNames.map((alt) => ({
songArtistId: a.songArtistId,
altSongArtistId: alt.songArtistId,
name: alt.name,
})),
);
for (const batch of chunk(artistAltNameRows, batchSize)) {
if (batch.length === 0) continue;
db.insert(artistAltNamesTable).values(batch).run();
}
// group alt names: { songGroupId, name } where object.songGroupId is the context
const groupAltNameRows = groups.flatMap((g) =>
g.altNames.map((alt) => ({
songGroupId: g.songGroupId,
contextSongGroupId: alt.songGroupId,
name: alt.name,
})),
);
for (const batch of chunk(groupAltNameRows, batchSize)) {
if (batch.length === 0) continue;
db.insert(groupAltNamesTable).values(batch).run();
}
}
// 2b) Insert songs
{
const songRows = songs.map((s) => ({
annSongId: s.annSongId,
songId: s.songId,
name: s.name,
category: s.category,
}));
for (const batch of chunk(songRows, batchSize)) {
if (batch.length === 0) continue;
db.insert(songsTable)
.values(batch)
.onConflictDoUpdate({
target: songsTable.annSongId,
set: {
songId: songsTable.songId,
name: songsTable.name,
category: songsTable.category,
},
})
.run();
}
}
// 2c) Insert anime and its normalized children
{
const animeRows = anime.map((a) => ({
annId: a.annId,
aniListId: a.aniListId,
malId: a.malId,
kitsuId: a.kitsuId,
categoryName: a.category.name,
categoryNumber: categoryNumberToText(a.category.number),
mainName: a.mainName,
mainNameEn: a.mainNames.EN,
mainNameJa: a.mainNames.JA,
year: a.year,
seasonId: a.seasonId,
opCount: a.opCount,
edCount: a.edCount,
insertCount: a.insertCount,
}));
for (const batch of chunk(animeRows, batchSize)) {
if (batch.length === 0) continue;
db.insert(animeTable)
.values(batch)
.onConflictDoUpdate({
target: animeTable.annId,
set: {
aniListId: animeTable.aniListId,
malId: animeTable.malId,
kitsuId: animeTable.kitsuId,
categoryName: animeTable.categoryName,
categoryNumber: animeTable.categoryNumber,
mainName: animeTable.mainName,
mainNameEn: animeTable.mainNameEn,
mainNameJa: animeTable.mainNameJa,
year: animeTable.year,
seasonId: animeTable.seasonId,
opCount: animeTable.opCount,
edCount: animeTable.edCount,
insertCount: animeTable.insertCount,
},
})
.run();
}
// For child tables, simplest is: delete existing for these annIds then insert fresh.
// (Safer than trying to upsert composite unique constraints for each child row.)
for (const a of anime) {
db.delete(animeNamesTable)
.where(eq(animeNamesTable.annId, a.annId))
.run();
db.delete(animeGenresTable)
.where(eq(animeGenresTable.annId, a.annId))
.run();
db.delete(animeTagsTable)
.where(eq(animeTagsTable.annId, a.annId))
.run();
db.delete(animeSongLinksTable)
.where(eq(animeSongLinksTable.annId, a.annId))
.run();
// names
if (a.names.length) {
db.insert(animeNamesTable)
.values(
a.names.map((n) => ({
annId: a.annId,
language: n.language,
name: n.name,
})),
)
.run();
}
// genres
if (a.genres.length) {
// Ensure lookup rows exist (string PK)
db.insert(genresTable)
.values(a.genres.map((g) => ({ name: g })))
.onConflictDoNothing()
.run();
// Insert relations
db.insert(animeGenresTable)
.values(
a.genres.map((g) => ({
annId: a.annId,
genreName: g,
})),
)
.run();
}
// tags
if (a.tags.length) {
// Ensure lookup rows exist (string PK)
db.insert(tagsTable)
.values(a.tags.map((t) => ({ name: t })))
.onConflictDoNothing()
.run();
// Insert relations
db.insert(animeTagsTable)
.values(
a.tags.map((t) => ({
annId: a.annId,
tagName: t,
})),
)
.run();
}
// song links
if (a.songLinks.length) {
db.insert(animeSongLinksTable)
.values(
a.songLinks.map((l) => ({
annId: a.annId,
annSongId: l.annSongId,
type: l.type,
number: l.number,
uploaded: l.uploaded,
rebroadcast: l.rebroadcast,
dub: l.dub,
})),
)
.run();
}
}
}
});
}
/**
* Convenience CLI entrypoint:
* bun run src/lib/db/import-amq.ts
*
* Requires:
* - DB_FILE_NAME set in env (see drizzle.config.ts and src/lib/db/index.ts)
*/
if (import.meta.main) {
const wipeFirst = process.argv.includes("--wipe");
const quiet = process.argv.includes("--quiet");
await importAmqData({ wipeFirst, verbose: !quiet });
// eslint-disable-next-line no-console
console.info("AMQ import complete");
}