Skip to content

Commit

Permalink
feat: Use weighted sampler for random course-name generation
Browse files Browse the repository at this point in the history
  • Loading branch information
PkuCuipy committed Jun 30, 2024
1 parent 59f13b2 commit d83a19f
Show file tree
Hide file tree
Showing 4 changed files with 185 additions and 20 deletions.
11 changes: 11 additions & 0 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
"private": true,
"homepage": ".",
"dependencies": {
"@keystonehq/alias-sampling": "^0.1.2",
"@testing-library/jest-dom": "^5.16.4",
"@testing-library/react": "^13.3.0",
"@testing-library/user-event": "^13.5.0",
Expand Down
166 changes: 166 additions & 0 deletions src/utils/course-name-sampler.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
import sample from '@keystonehq/alias-sampling';
// Doc: https://www.npmjs.com/package/@keystonehq/alias-sampling

const prefixes = [
["西方", 1],
["现代", 1],
["近代", 1],
["高级", 1],
["宏观", 1],
["学术", 1],
["高等", 1],
["计算", 1],
["工程", 1],
["大学", 1],
["应用", 1],
["公共", 1],
["普通", 1],
["实用", 1],
["国际", 0.5],
["美国", 0.05],
["英美", 0.05],
["欧洲", 0.05],
["新时代", 0.2],
["中国特色", 0.2],
["马克思主义", 0.1],
["量子", 0.2],
["", 11],
];

const concepts = [
["数学", 1],
["语言", 1],
["外国语", 1],
["物理", 1],
["化学", 1],
["生物", 1],
["医学", 1],
["计算", 1],
["信息", 1],
["电子", 1],
["法学", 1],
["哲学", 1],
["心理", 0.5],
["心理学", 0.5],
["社会", 0.5],
["社会学", 0.5],
["毒理学", 0.2],
["伦理学", 0.2],
["传播", 1],
["新闻", 1],
["历史", 1],
["考古", 1],
["摄影", 1],
["运筹", 1],
["天体", 0.5],
["地理", 1],
["光学", 1],
["系统", 1],
["机械", 1],
["电磁学", 0.5],
["农学", 0.5],
["游戏设计", 0.5],
["戏曲", 1],
["音乐", 1],
["太极拳", 1],
["生殖", 0.5],
["航空", 0.5],
["航天", 0.5],
["健美", 1],
["环境", 1],
["美术", 1],
["文化", 1],
["地震", 0.2],
["古生物", 0.3],
["统计", 1],
["数论", 1],
["逻辑学", 1],
["偏微分方程", 0.3],
["谢慧民", 0.1],
["服装", 0.2],
["拓扑", 1],
["方程", 1],
["代数", 1],
["数理", 1],
["力学", 1],
["几何", 1],
["计算机", 1],
["嵌入式", 0.5],
["机器学习", 0.5],
["区块链", 0.5],
["图像处理", 0.3],
["网络", 0.3],
["硬件", 0.5],
["数据挖掘", 0.3],
["算法", 0.5],
["安全", 0.5],
["法语", 0.2],
["德语", 0.2],
["日语", 0.2],
["文学", 1],
["经济学", 1],
["文献", 1],
["发展", 0.3],
["道德", 0.5],
["法治", 1],
["半导体", 0.5],
["电影", 0.5],
["媒体", 0.5],
["新媒体", 0.5],
["《庄子》", 0.1],
["《理想国》", 0.1],
["《红楼梦》", 0.1],
]

const connectives = [
["与", 1],
["中的", 1],
["", 2],
]

const suffixes = [
["概论", 1],
["基础", 1],
["导论", 1],
["实践", 1],
["实习", 1],
["(A)", 1],
["(B)", 2],
["(C)", 0.5],
["(实验班)", 1],
["(上)", 1],
["(下)", 1],
["理论", 1],
["设计", 1],
["分析", 1],
["方法", 1],
["研讨班", 1],
["习题课", 1],
["通识", 1],
["实验", 1],
["研究", 1],
["原理", 1],
["赏析", 0.5],
["史", 0.5],
["专题", 0.5],
["精读", 0.5],
["选读", 0.5],
["设计与实现", 0.5],
["听说", 0.5],
["前沿", 0.5],
["入门", 0.3],
["初步", 0.2],
["上机", 0.2],
["", 2],
];

const getElems = (arr) => arr.map(x => x[0]);
const getProbs = (arr) => {
let sum = arr.reduce((acc, x) => acc + x[1], 0);
return arr.map(x => x[1] / sum);
}

export const prefix_sampler = sample(getProbs(prefixes), getElems(prefixes));
export const concept_sampler = sample(getProbs(concepts), getElems(concepts));
export const connective_sampler = sample(getProbs(connectives), getElems(connectives));
export const suffix_sampler = sample(getProbs(suffixes), getElems(suffixes));
27 changes: 7 additions & 20 deletions src/utils/random-generate.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { nextUniqueId } from "./miscs";
import { prefix_sampler, concept_sampler, connective_sampler, suffix_sampler } from "./course-name-sampler";

// 随机生成整数 (min 和 max 都是 inclusive 的!)
export function randint(min, max) {
Expand All @@ -8,7 +9,7 @@ export function randint(min, max) {
}

// 列表中随机选一个元素
export function random_choice(list) { // TODO: change to weighted random choice
export function random_choice(list) {
return list[randint(0, list.length - 1)];
}

Expand All @@ -19,27 +20,13 @@ export function random_credit() {

// 随机生成课程名
export function random_course_name() {
const prefixes = ["西方", "现代", "近代", "高级", "宏观", "学术", "高等", "计算", "工程", "大学", "应用", "公共",
"普通", "国际", "新时代", "量子", "实用", "", "", "", "", "", "", "", "", "", "", ""];
const concepts = ["数学", "语言", "外国语", "物理", "化学", "生物", "医学", "计算", "信息", "电子",
"法学", "哲学", "心理", "社会", "传播", "新闻", "历史", "考古", "摄影",
"运筹", "天体", "地理", "光学", "系统", "机械", "电磁学", "农学", "游戏设计", "戏曲",
"音乐", "太极拳", "生殖", "航空", "航天", "健美", "环境", "美术", "文化", "统计",
"力学", "几何", "嵌入式", "硬件", "法语", "德语", "文学", "逻辑", "计算机", "数论",
"偏微分", "方程", "代数", "数理", "网络", "机器学习", "图像处理", "经济",
"文献", "发展", "道德", "法治", "数据", "半导体", "电影", "算法", "媒体",]
const connectives = ["与", "中的", "", ""]
const suffixes = ["概论", "基础", "导论", "实践", "(A)", "(B)", "(C)", "(实验班)", "(上)", "(下)", "理论",
"设计", "分析", "方法", "研讨班", "通识", "实验", "研究", "原理", "赏析", "实习", "史",
"专题", "精读", "设计与实现", "习题", "听说", "前沿", "", ""];

// 生成模式: prefix concept (connective concept)? suffix
let prefix = random_choice(prefixes);
let concept = random_choice(concepts);
let prefix = prefix_sampler.next();
let concept = concept_sampler.next();
let have_second = random_choice([true, false]);
let connective = have_second ? random_choice(connectives) : "";
let concept2 = have_second ? random_choice(concepts) : "";
let suffix = random_choice(suffixes);
let connective = have_second ? connective_sampler.next() : "";
let concept2 = have_second ? concept_sampler.next() : "";
let suffix = suffix_sampler.next();
return "".concat(prefix, concept, connective, concept2, suffix);
}

Expand Down

0 comments on commit d83a19f

Please sign in to comment.