“Web -Scraping mit R Selenium” Code-Antworten

Web -Scraping mit R Selenium

zips.df <- read.csv("zip_code_data.csv") # csv of zip codes

rD <- rsDriver(browser="firefox", port=4557L)
remDr <- rD[["client"]]

remDr$navigate("https://www.fcc.gov/media/engineering/dtvmaps")

scrape.zips <- function(zip){ # our scraping function
  
  remDr$findElement("id", "startpoint")$sendKeysToElement(list(zip))
  remDr$findElements("id", "btnSub")[[1]]$clickElement()
  
  alert <- try(remDr$getAlertText(), silent=T)
  
  if(class(alert) != "try-error") {
    
    signals <- data.frame(callsign = NA, network = NA, ch_num = NA, band = NA, strength = NA, cont.strength = NA)
    remDr$acceptAlert()
    remDr$findElement("id", "startpoint")$clearElement()
    
  } else {
    Sys.sleep(2)
    
    html <- remDr$getPageSource()[[1]]
    
    cont.strength <- read_html(html) %>% 
      html_nodes(".callsign") %>% 
      html_attr("onclick") %>% 
      str_extract("(?<=RX Strength: )\\s*\\-*[0-9.]+")
    
    signals <- read_html(html) %>%
      html_nodes("table.tbl_mapReception") %>%
      .[3] %>%
      .[[1]] %>%
      html_table(fill=T)
    
    names(signals) <- c("rm", "callsign", "network", "ch_num", "band", "rm2")
    
    signals <- signals %>%
      slice(2:n()) %>%
      filter(callsign != "") %>%
      select(callsign:band)
    
    strength <- read_html(html) %>%
      html_nodes("table.tbl_mapReception:nth-child(3) .ae-img") %>%
      html_attr("src")
    
    if(length(strength)==0) { strength <- "none" }
    if(length(cont.strength)==0) { cont.strength <- "none" }
    
    signals <- cbind(signals, strength) %>% cbind(cont.strength)
    
    signals <- mutate(signals, strength = strength %>% str_extract("strength."))
  }
  
  remDr$findElement("id", "startpoint")$clearElement()

  return(signals)
  
  Sys.sleep(runif(1, 1, 3))

}
Bushra

Web -Scraping mit R Selenium

zips.df <- zips.df %>%
  group_by(zip) %>%
  do(scrape_safe(.$zip))
Bushra

Web -Scraping mit R Selenium

remDr$findElement("id", "startpoint")$clearElement()
Bushra

Web -Scraping mit R Selenium

alert <- try(remDr$getAlertText(), silent=T) # check if there is an alert window
  
  if(class(alert) != "try-error") { # if an alert window is present, do the following
    
    signals <- data.frame(callsign = NA, network = NA, ch_num = NA, band = NA, strength = NA, cont.strength = NA)
    remDr$acceptAlert()
    remDr$findElement("id", "startpoint")$clearElement()
    
  } else { # if no alert, continue on as normal
    
    # normal scraping procedure code here
    
  }
Bushra

Web -Scraping mit R Selenium

scrape_safe <- function(zip){
  
  result <- try(scrape.zips(zip))
  
  if (class(result) == "try-error") { # if there is any error caught, return a blank dataframe and keep going
    cat("Error encountered for zip:", zip, "\n")
    return(data.frame()) 
    Sys.sleep(runif(1, 1, 3))
  } else { # if no error, keep going as normal to next zip
    return(result)
  }
}
Bushra

Web -Scraping mit R Selenium

zip <- "27511"
remDr$findElement(using = "id", value = "startpoint")$sendKeysToElement(list(zip))
Bushra

Web -Scraping mit R Selenium

zip <- "27511111"
remDr$findElement(using = "id", value = "startpoint")$sendKeysToElement(list(zip))
remDr$findElements("id", "btnSub")[[1]]$clickElement()
Bushra

Ähnliche Antworten wie “Web -Scraping mit R Selenium”

Fragen ähnlich wie “Web -Scraping mit R Selenium”

Weitere verwandte Antworten zu “Web -Scraping mit R Selenium” auf JavaScript

Durchsuchen Sie beliebte Code-Antworten nach Sprache

Durchsuchen Sie andere Codesprachen