fix

80e87985 · Andrii · 226d9203 · 80e87985 · 80e87985 · 226d9203
Commit 80e87985 authored Apr 04, 2017 by Andrii
9 changed files
--- a/.gitignore
+++ b/.gitignore
+# R files
 .Rproj.user
 .Rhistory
 .RData
 .Ruserdata
+
+# data files
+data
+info
+rsconnect
+
--- a/app.R
+++ b/app.R
--- a/data.zip
+++ b/data.zip
--- a/data/wp_esi_entity
+++ b/data/wp_esi_entity
--- a/data/wp_esi_media
+++ b/data/wp_esi_media
--- a/data/wp_esi_news
+++ b/data/wp_esi_news
--- a/data/wp_esi_tag
+++ b/data/wp_esi_tag
--- a/research_by_tech.R
+++ b/research_by_tech.R
-#
-# select companies by tech
-#
-
-library(shinydashboard)
-library(RMySQL)
-#library(sqldf)
-library(feather)
-library(ggplot2)
-library(dplyr)
-library(reshape2)
-library(lubridate)
-library(plotly) 
-
-# 1. Connect to db
-mydb <-  dbConnect(MySQL(), user='analyst', password='exa_analyst1&', 
-                   dbname='esi_management', 
-                   host='lecanaldb.c12hbxfn3xzn.eu-west-1.rds.amazonaws.com',
-                   port=3306)
-
-# 2. List of tables
-dbListTables(mydb)
-
-# 3. Load tables
-
-db_load <- function(table){
-  rs = dbSendQuery(mydb, paste0("select * from ", table))
-  df = fetch(rs, n=-1)
-  write_feather(df, paste0("data/", table))  
-}
-
-# 3.2. Load all tables
-# <main>
-df_entity <- db_load("wp_esi_entity")
-
-# basic for cross
-df_tag <- db_load("wp_esi_tag")
-df_technology <- db_load("wp_esi_technology")
-df_industry <- db_load("wp_esi_industry")
-df_function <- db_load("wp_esi_function")
-
-# cross-tables
-df_entity_tag <- db_load("wp_esi_tag_entity")
-df_entity_technology <- db_load("wp_esi_technology_entity")
-df_entity_industry <- db_load("wp_esi_entity_industry")
-
-# stats
-nrow(df_technology)
-write.csv(df_technology, "technology.csv", row.names = F)
-
-nrow(df_industry)
-write.csv(df_industry, "industry.csv", row.names = F)
-
-nrow(df_entity_technology)
-write.csv(df_entity_technology, "entity_technology.csv", row.names = F)
-
-nrow(df_functon)
-write.csv(df_functon, "functon.csv", row.names = F)
-
-nrow(df_entity_industry)
-write.csv(df_entity_industry, "entity_industry.csv", row.names = F)
-
-# Merge ------------------------------------------------------------------
-
-# Add "source" feature
-df_tag$source <- "tag"
-
-df_technology$source <- "technology"
-
-df_industry$description <- NA
-df_industry$source <- "industry"
-
-df_function$description <- NA
-df_function$source <- "function"
-
-# Merge all
-df <- df_tag %>% 
-  rbind(df_technology) %>% 
-  rbind(df_industry) %>% 
-  rbind(df_function) %>% 
-  arrange(name)
-
-write.csv(df, "tag_tech_ind_func.csv", row.names = F)
-
-# # 4. Save as a chached file
-# write_feather(df, paste0("data/", table))  
--- a/research_news.R
+++ b/research_news.R
-#
-# "News" tables analysis
-#
-
-# "wp_esi_news_entity"
-# "wp_esi_news_radar"
-# "wp_esi_news_function"
-# "wp_esi_news_industry"
-# "wp_esi_tag_news"
-# "wp_esi_technology_news"
-
-library(shinydashboard)
-library(RMySQL)
-#library(sqldf)
-library(feather)
-library(ggplot2)
-library(dplyr)
-library(reshape2)
-library(lubridate)
-library(plotly) 
-
-# 1. Connect to db
-mydb <-  dbConnect(MySQL(), user='analyst', password='exa_analyst1&', 
-                   dbname='esi_management', 
-                   host='lecanaldb.c12hbxfn3xzn.eu-west-1.rds.amazonaws.com',
-                   port=3306)
-
-# 2. List of tables
-dbListTables(mydb)
-
-# 3. Load tables
-
-db_load <- function(table){
-  rs = dbSendQuery(mydb, paste0("select * from ", table))
-  df = fetch(rs, n=-1)
-  write_feather(df, paste0("data/", table))  
-}
-
-# 3.2. Load all tables
-# <main>
-df_entity <- db_load("wp_esi_entity")
-
-# basic for cross
-df_tag <- db_load("wp_esi_tag")
-df_technology <- db_load("wp_esi_technology")
-df_industry <- db_load("wp_esi_industry")
-df_function <- db_load("wp_esi_function")
-
-# news tables
-df_news <- db_load("wp_esi_news")
-df_news_entity <- db_load("wp_esi_news_entity")
-
-# other tables
-df_media <- db_load("wp_esi_media")
-df_region <- db_load("wp_esi_region")
-df_type <- db_load("wp_esi_type")
-
-# Feature engineering
-df_news_view <- df_news %>% 
-  left_join(df_news_entity, by = c("id" = "news_id")) %>% 
-  left_join(df_entity, by = c("entity_id" = "id")) %>% 
-  left_join(df_media, by = c("media_id" = "id")) %>% 
-  left_join(df_type, by = c("type_id" = "id")) %>% 
-  left_join(df_region, by = c("region_id" = "id")) %>%
-  subset(select = c(id, title, publish_date, url.x,
-                    name.x, name.y, name.x.x, name.y.y)) %>% 
-  rename(url = url.x, entity_name = name.x, media_name = name.y, 
-         type_name = name.x.x, region_name = name.y.y) %>% 
-  mutate(publish_date = as.Date(publish_date))
-
-# All columns in merge data set
-# write.csv(colnames(df_news_view), "c.txt", row.names = F)
-
-
-# df_news_view <- left_join(df_news_view, df_news_entity, by = c("id" = "news_id"))