跳转到内容

Talk:2020年中国围棋甲级联赛

页面内容不支持其他语言。
维基百科,自由的百科全书

将新浪棋牌比赛结果转换为维基格式的R程序(作者Robin Lu):

  1. Install libraries

library(rvest) library(readr)

  1. ASSUMPTIONS:
  2. - number of rows in a table (5 - header and 4 rows)
  3. - main text of website is in div whose id is artibody
  4. - title of article is of class main-title
  5. - date is followed by \UFF0C (fullwidth comma)
  6. - 台次 is surrounded by \UFF08 (fullwidth left parenthesis) and \UFF09 (fullwidth right parenthesis)
  7. - each row with match info is proceeded by 3 characters: "#台:"
  1. Input

stop("Change inputs") theurl <- "https://sports.sina.com.cn/go/2020-12-13/doc-iiznctke6296498.shtml" resultFile <- "test10.txt"


  1. Harvest website

website <- read_html(theurl)

par <- html_nodes(website, "#artibody div ~ p") %>%

 html_text() %>%
 trimws(which = "both", whitespace = "[\\h\\v]")

artTitle <- html_nodes(website, ".main-title") %>%

 html_text()

tableInfo <- par[-c(1,length(par))] tableInfo <- gsub("\UFF08", "||", tableInfo, fixed = T) tableInfo <- gsub("\UFF09", "", tableInfo, fixed = T) tableInfo <- gsub(" ", "||", tableInfo, fixed = T)

artDate <- unlist(strsplit(par[1], "\UFF0C"))[1] header <- paste0(artDate, "[1]\n")

result <- c(header)

colBegin <- "

colMidBeg <- "

\n"

rowSep <- "|-\n"

nColObjs <- ceiling(length(par) / 6 / 3)

nObj <- length(tableInfo)

  1. loop for each row of 3 tables

for (i in 1:nColObjs) {

 currCol <- colBegin
 # loop for each table in row
 for (j in 1:3) {
   start <- ((i - 1) * 18) + ((j - 1) * 6) + 1
   if (start <= nObj)
     currCol <- paste0(currCol, colMidBeg) %>%
       paste0("!", gsub("||", "!!", tableInfo[start], fixed=TRUE), "!!台次\n") %>%
       paste0(rowSep) %>%
       paste0("|", substr(tableInfo[start + 1], 4, nchar(tableInfo[start + 1])), "\n") %>%
       paste0(rowSep) %>%
       paste0("|", substr(tableInfo[start + 2], 4, nchar(tableInfo[start + 2])), "\n") %>%
       paste0(rowSep) %>%
       paste0("|", substr(tableInfo[start + 3], 4, nchar(tableInfo[start + 3])), "\n") %>%
       paste0(rowSep) %>%
       paste0("|", substr(tableInfo[start + 4], 4, nchar(tableInfo[start + 4])), "\n") %>%
       paste0(colMidEnd)
 }
 currCol <- paste0(currCol, colEnd)
 result <- c(result, currCol)

}

toprint <- paste(result, collapse = "") write_file(toprint, resultFile)—以上未簽名的留言由Kelu對話貢獻)於2021年1月1日 (五) 14:01 (UTC)加入。[回复]

  1. ^ [", theurl, " ", artTitle, "]