## Script for downloading Zip files of adjusted daily historical prices from ShareInvestor website
## No need for RSelenium as this is not scraping from javascript-based webpage
rm(list=ls())
library(rvest)
library(xlsx) # needed to read from Excel file
library(stringr) # needed to do some padding in string operations
library(dplyr)
## Part 01 - Logging into ShareInvestor member page
## Due to the site requires login, the same session must be maintain for downloading files
login_url = "https://www.shareinvestor.com/user/login.html"
session = html_session(login_url)
form = html_form(session)2 Private or Broken Links
The page you're looking for is either not available or private!
# the login form is the 2nd form of the page/url
## Form on home page has no submit button - inject a fake submit button or else rvest cannot submit.
fake_submit_button = list(name = NULL
, type = "submit"
, value = NULL
, checked = NULL
, disabled = NULL
, readonly = NULL
, required = FALSE)
attr(fake_submit_button, "class") = "input"
form"fields" Private or Broken Links
The page you're looking for is either not available or private!
"submit" Private or Broken Links
The page you're looking for is either not available or private!
= fake_submit_button
filled_form = set_values(form
, name = 'username'
, password = 'password')
submit_form(session, filled_form) #Submit form to authenticate the current session
## Part 02 - Downloading historical price zip file =
## Zip files are saved to /Price Data/ subfolder
## Load list of stock to download
scode_list = as.matrix(read.xlsx(file="./s_code_all.xlsx", sheetIndex=1, startRow=1)[,3])
scode_list = unique(scode_list[which(scode_list!="FALSE")])
scode_list = str_pad(scode_list, width=4, side="left", pad="0")
## Limit the list of stock to download by excluding already downloaded file (based on size)
## I had to do this because there's a login time out before I can complete all downloads
dld_list = file.info(list.files(path="./Price Data - hist/", full.names=TRUE)) %>%
mutate(sname=gsub("./Price Data - hist/hist_|.zip","",rownames(.))) %>%
filter(size>100) %>%
select(sname)
xdld_list = scode_list[which(!(scode_list %in% dld_list[,1]))]
todl_list = xdld_list[1:20] # further limit to just 20 stocks to download
for(scode in todl_list){
download_url = paste0("http://www.shareinvestor.com/prices/price_download_zip_file.zip?type=historical&counter="
, scode
, ".MY")
zipfile = jump_to(session, download_url)
writeBin(zipfile$response$content
# , paste0("./Price Data/hist_", scode, format(Sys.Date(), "_%Y%m%d"), ".zip") )
, paste0("./Price Data - hist/hist_", scode, ".zip") )
Sys.sleep(2)
}
# download.file(download_url, "./file.zip", method = "curl")