Commit 80e87985 authored by Andrii's avatar Andrii

fix

parent 226d9203
# R files
.Rproj.user
.Rhistory
.RData
.Ruserdata
# data files
data
info
rsconnect
This diff is collapsed.
File deleted
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
#
# select companies by tech
#
library(shinydashboard)
library(RMySQL)
#library(sqldf)
library(feather)
library(ggplot2)
library(dplyr)
library(reshape2)
library(lubridate)
library(plotly)
# 1. Connect to db
mydb <- dbConnect(MySQL(), user='analyst', password='exa_analyst1&',
dbname='esi_management',
host='lecanaldb.c12hbxfn3xzn.eu-west-1.rds.amazonaws.com',
port=3306)
# 2. List of tables
dbListTables(mydb)
# 3. Load tables
db_load <- function(table){
rs = dbSendQuery(mydb, paste0("select * from ", table))
df = fetch(rs, n=-1)
write_feather(df, paste0("data/", table))
}
# 3.2. Load all tables
# <main>
df_entity <- db_load("wp_esi_entity")
# basic for cross
df_tag <- db_load("wp_esi_tag")
df_technology <- db_load("wp_esi_technology")
df_industry <- db_load("wp_esi_industry")
df_function <- db_load("wp_esi_function")
# cross-tables
df_entity_tag <- db_load("wp_esi_tag_entity")
df_entity_technology <- db_load("wp_esi_technology_entity")
df_entity_industry <- db_load("wp_esi_entity_industry")
# stats
nrow(df_technology)
write.csv(df_technology, "technology.csv", row.names = F)
nrow(df_industry)
write.csv(df_industry, "industry.csv", row.names = F)
nrow(df_entity_technology)
write.csv(df_entity_technology, "entity_technology.csv", row.names = F)
nrow(df_functon)
write.csv(df_functon, "functon.csv", row.names = F)
nrow(df_entity_industry)
write.csv(df_entity_industry, "entity_industry.csv", row.names = F)
# Merge ------------------------------------------------------------------
# Add "source" feature
df_tag$source <- "tag"
df_technology$source <- "technology"
df_industry$description <- NA
df_industry$source <- "industry"
df_function$description <- NA
df_function$source <- "function"
# Merge all
df <- df_tag %>%
rbind(df_technology) %>%
rbind(df_industry) %>%
rbind(df_function) %>%
arrange(name)
write.csv(df, "tag_tech_ind_func.csv", row.names = F)
# # 4. Save as a chached file
# write_feather(df, paste0("data/", table))
#
# "News" tables analysis
#
# "wp_esi_news_entity"
# "wp_esi_news_radar"
# "wp_esi_news_function"
# "wp_esi_news_industry"
# "wp_esi_tag_news"
# "wp_esi_technology_news"
library(shinydashboard)
library(RMySQL)
#library(sqldf)
library(feather)
library(ggplot2)
library(dplyr)
library(reshape2)
library(lubridate)
library(plotly)
# 1. Connect to db
mydb <- dbConnect(MySQL(), user='analyst', password='exa_analyst1&',
dbname='esi_management',
host='lecanaldb.c12hbxfn3xzn.eu-west-1.rds.amazonaws.com',
port=3306)
# 2. List of tables
dbListTables(mydb)
# 3. Load tables
db_load <- function(table){
rs = dbSendQuery(mydb, paste0("select * from ", table))
df = fetch(rs, n=-1)
write_feather(df, paste0("data/", table))
}
# 3.2. Load all tables
# <main>
df_entity <- db_load("wp_esi_entity")
# basic for cross
df_tag <- db_load("wp_esi_tag")
df_technology <- db_load("wp_esi_technology")
df_industry <- db_load("wp_esi_industry")
df_function <- db_load("wp_esi_function")
# news tables
df_news <- db_load("wp_esi_news")
df_news_entity <- db_load("wp_esi_news_entity")
# other tables
df_media <- db_load("wp_esi_media")
df_region <- db_load("wp_esi_region")
df_type <- db_load("wp_esi_type")
# Feature engineering
df_news_view <- df_news %>%
left_join(df_news_entity, by = c("id" = "news_id")) %>%
left_join(df_entity, by = c("entity_id" = "id")) %>%
left_join(df_media, by = c("media_id" = "id")) %>%
left_join(df_type, by = c("type_id" = "id")) %>%
left_join(df_region, by = c("region_id" = "id")) %>%
subset(select = c(id, title, publish_date, url.x,
name.x, name.y, name.x.x, name.y.y)) %>%
rename(url = url.x, entity_name = name.x, media_name = name.y,
type_name = name.x.x, region_name = name.y.y) %>%
mutate(publish_date = as.Date(publish_date))
# All columns in merge data set
# write.csv(colnames(df_news_view), "c.txt", row.names = F)
# df_news_view <- left_join(df_news_view, df_news_entity, by = c("id" = "news_id"))
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment