D2RS-2026spring 数据分析
  • 首页
  • 课程导论
  • 技术工具
  • Issue #1 分析
  • Issue #2 分析
  • 成员名单
  • 结课项目
  • 检查作业

On this page

  • 说明
  • 不合规申请名单

Issue #1 - 申请加入成员名单

Published

April 3, 2026

说明

以下名单来自 Issue #1 的回复,数据实时从 GitHub API 拉取。

共收到 314 条申请(学号格式合规:311 条,不合规:3 条)。

不合规申请名单

以下学号不符合规范(须以 2025 开头、共 13 位数字),请本人修改后重新在 Issue #1 中回复:

Source Code
---
title: "Issue #1 - 申请加入成员名单"
date: today
format:
  html:
    page-layout: full
editor: visual
---

## 说明

以下名单来自 [Issue #1](https://github.com/D2RS-2026spring/members/issues/1) 的回复,数据实时从 GitHub API 拉取。

```{r}
#| echo: false
#| warning: false
#| message: false

library(gh)
library(dplyr)
library(purrr)
library(stringr)
library(DT)
library(lubridate)

# 让 R 的 gh 包使用 gh CLI 的 Token
Sys.setenv(GITHUB_PAT = system("gh auth token", intern = TRUE))

# 配置
ORG_NAME   <- "D2RS-2026spring"
REPO_NAME  <- "members"
ISSUE_NUM  <- 1

# 获取 Issue #1 的所有评论
comments <- gh::gh(
  "/repos/{owner}/{repo}/issues/{issue_number}/comments",
  owner        = ORG_NAME,
  repo         = REPO_NAME,
  issue_number = ISSUE_NUM,
  per_page     = 100,
  .limit       = Inf
)

# 解析每条评论的学生信息
parse_student <- function(comment) {
  body     <- comment$body
  login    <- comment$user$login
  html_url <- comment$html_url
  created  <- comment$created_at

  # 提取学号
  student_id <- NA_character_
  if (str_detect(body, "学号[::][\\s]*(\\d{10,13})")) {
    student_id <- str_extract(body, "(?<=学号[::][\\s{0,5}])\\d{10,13}")
    if (is.na(student_id)) {
      student_id <- str_extract(body, "学号[::]\\s*(\\d{10,13})") |>
        str_extract("\\d{10,13}")
    }
  } else if (str_detect(body, "^(\\d{10,13})")) {
    student_id <- str_extract(body, "^\\d{10,13}")
  }

  # 提取姓名
  name <- NA_character_
  if (str_detect(body, "姓名[::][\\s]*[\\u4e00-\\u9fa5]{2,4}")) {
    name <- str_extract(body, "姓名[::]\\s*([\\u4e00-\\u9fa5]{2,4})") |>
      str_replace("姓名[::]\\s*", "")
  }

  # 提取感兴趣方向
  interest <- NA_character_
  if (str_detect(body, "感兴趣[的方向方向]*[::][\\s]*[\\u4e00-\\u9fa5a-zA-Z0-9]")) {
    interest <- str_extract(body, "感兴趣[的方向方向]*[::]\\s*([\\u4e00-\\u9fa5a-zA-Z0-9]+)") |>
      str_replace("感兴趣[的方向方向]*[::]\\s*", "")
  }

  data.frame(
    student_id = student_id,
    name       = name,
    interest   = interest,
    login      = login,
    html_url   = html_url,
    created_at = created,
    stringsAsFactors = FALSE
  )
}

# 解析所有评论
student_df <- map(comments, parse_student) |> bind_rows()

# 只保留成功解析出学号的记录
student_df <- student_df |>
  filter(!is.na(student_id)) |>
  mutate(
    student_id = str_trim(student_id),
    valid_id   = str_detect(student_id, "^2025\\d{9}$")
  )

# 学号脱敏:将第5~9位(共5位)替换为 *****
mask_id <- function(id) {
  if (is.na(id) || nchar(id) < 9) return(id)
  paste0(substr(id, 1, 4), "*****", substr(id, 10, nchar(id)))
}

# 格式化时间并生成带链接的 HTML
student_display <- student_df |>
  arrange(created_at) |>
  mutate(
    序号         = row_number(),
    学号         = student_id,
    姓名         = ifelse(is.na(name), "—", name),
    感兴趣方向   = ifelse(is.na(interest) | interest %in% c("无", "NA"), "—", interest),
    GitHub账号   = sprintf('<a href="https://github.com/%s" target="_blank">%s</a>', login, login),
    申请时间     = sprintf(
      '<a href="%s" target="_blank">%s</a>',
      html_url,
      format(ymd_hms(created_at, tz = "Asia/Shanghai"), "%Y-%m-%d %H:%M")
    )
  ) |>
  select(序号, 学号, 姓名, 感兴趣方向, GitHub账号, 申请时间)

# 统计摘要
total     <- nrow(student_display)
valid_cnt <- sum(student_df$valid_id)
invalid_cnt <- total - valid_cnt

# 不合法记录(用于下方展示,不脱敏,方便核查)
invalid_df <- student_df |>
  filter(!valid_id) |>
  arrange(created_at) |>
  mutate(
    GitHub账号 = sprintf('<a href="https://github.com/%s" target="_blank">%s</a>', login, login),
    申请时间   = sprintf(
      '<a href="%s" target="_blank">%s</a>',
      html_url,
      format(ymd_hms(created_at, tz = "Asia/Shanghai"), "%Y-%m-%d %H:%M")
    ),
    原始学号   = student_id,
    姓名       = ifelse(is.na(name), "—", name),
    问题说明   = dplyr::case_when(
      is.na(student_id)                          ~ "学号未填写",
      !str_detect(student_id, "^\\d+$")          ~ "含非数字字符",
      nchar(student_id) != 13                    ~ paste0("位数不对(", nchar(student_id), "位,应为13位)"),
      !str_detect(student_id, "^2025")           ~ "不以2025开头",
      TRUE                                       ~ "格式不符合规范"
    )
  ) |>
  select(原始学号, 姓名, 问题说明, GitHub账号, 申请时间)
```

共收到 **`r total`** 条申请(学号格式合规:**`r valid_cnt`** 条,不合规:**`r invalid_cnt`** 条)。

```{r}
#| echo: false
datatable(
  student_display,
  escape      = FALSE,
  rownames    = FALSE,
  filter      = "top",
  options     = list(
    pageLength  = 20,
    lengthMenu  = c(10, 20, 50, 100),
    language    = list(
      search      = "搜索:",
      lengthMenu  = "每页显示 _MENU_ 条",
      info        = "第 _START_ 至 _END_ 条,共 _TOTAL_ 条",
      paginate    = list(previous = "上一页", `next` = "下一页")
    ),
    columnDefs  = list(
      list(className = "dt-center", targets = c(0, 1, 4, 5)),
      list(width = "120px", targets = 1),
      list(width = "150px", targets = 5)
    )
  ),
  caption = htmltools::tags$caption(
    style = "caption-side: bottom; text-align: right; color: #888;",
    "数据来源:GitHub Issue #1 评论,实时获取"
  )
)
```

## 不合规申请名单

以下学号不符合规范(须以 `2025` 开头、共 13 位数字),请本人修改后重新在 Issue #1 中回复:

```{r}
#| echo: false
if (nrow(invalid_df) == 0) {
  cat("✅ 所有申请的学号均符合规范!")
} else {
  datatable(
    invalid_df,
    escape   = FALSE,
    rownames = FALSE,
    options  = list(
      pageLength = 20,
      language   = list(
        search     = "搜索:",
        lengthMenu = "每页显示 _MENU_ 条",
        info       = "第 _START_ 至 _END_ 条,共 _TOTAL_ 条",
        paginate   = list(previous = "上一页", `next` = "下一页")
      ),
      columnDefs = list(
        list(className = "dt-center", targets = c(2, 3, 4))
      )
    ),
    caption = htmltools::tags$caption(
      style = "caption-side: bottom; text-align: right; color: #888;",
      "请核查后在 Issue #1 中重新提交正确学号"
    )
  )
}
```