import * as pinyin from 'pinyin-utils'

export class Dict {
    constructor(data) {
        const lines = data.split('\n');
        const definitions = lines.map(parseLine).filter((l) => l);
        const traditionalMap = new Map();
        for (const definition of definitions) {
            const key = definition.traditional;
            if (!traditionalMap.has(key)) {
                traditionalMap.set(key, []);
            }
            traditionalMap.get(key).push(definition);
        }
        this.traditionalMap = traditionalMap;
    }

    traditional(query) {
        return this.traditionalMap.get(query);
    }
}

export async function loadDict() {
    const response = await fetch('/data/cedict_ts.u8');
    const text = await response.text();
    return new Dict(text);
}

function parseLine(line) {
    const trimmedLine = line.trim();
    if (trimmedLine.startsWith('#') || trimmedLine.startsWith('%')) {
        return;
    }
    const lineFormat = /^(.+?) (.+?) \[(.+?)\] \/(.+)\/$/;
    const matches = lineFormat.exec(trimmedLine);
    if (!matches) {
        return;
    }
    const pronunciation = matches[3].split(' ').map(pinyin.numberToMark).join(' ');
    return {
        traditional: matches[1],
        simplified: matches[2],
        pronunciation,
        definition: matches[4].split('/'),
    };
}