-5CG8171Y0H.Rhistory

IGZdta[IGZdta$Indicator =="Participation Rate","IndicatorFullName"] <- "Participation Rate (%)"
IGZdta[IGZdta$Indicator =="Depopulation","IndicatorFullName"] <- "Depopulation Index"
IGZdta[IGZdta$Indicator == "Attainment", "IndicatorFullName"] <- "Average Highest Attainment"
#save CSV data file
write_excel_csv(IGZdta ,file = "data/IGZcleandata.csv")
##DZ data
DZdta <- read_excel("data/Final data - Aug 2022/Final DZ Data - Aug 22.xlsx",
sheet = 2)
for (i in 3:7) {
DZdta2 <- read_excel("data/Final data - Aug 2022/Final DZ Data - Aug 22.xlsx",
sheet = i)
DZdta <- left_join(DZdta, DZdta2, by = "DZ")
}
#combine columns into 1 long variable
DZdta <- gather(DZdta, Indicator_Type_Year, value, -1,-2,-3,-4)
#separate 1 column into 3
DZdta <- separate(DZdta, Indicator_Type_Year, c("Indicator","Type","Year"), sep = "_")
#save CSV data file
write_excel_csv(DZdta ,file = "data/DZcleandata.csv")
##File for adding family group info to CPP dta
#RUN AFTER Clean data.R
##After this run ShapefilesCode.R
###Match family groups to CPPs
library(tidyverse)
library(readxl)
#read in family group data
FGdta <- read_excel("data/Family Groups.xlsx")
names(FGdta)[1] <- "CPP"
FGdta[FGdta$CPP == "Edinburgh City",1] <- "Edinburgh, City of"
##read in CPP data
CPPdta <- read_csv("data/CPPcleandata.csv")
##match all FGs to CPP data
CPPdta <- left_join(CPPdta, FGdta, by = c("CPP" = "CPP"))
###if the indicator is rurality replace the FG - decided not to do this
#CPPdta$FG <-with(CPPdta, ifelse(Indicator %in% c("Fragility","Fuel Poverty"), popdensityFG_2012, deprivationFG_2012))
#CPPdta <- CPPdta[c(1:5,8)]
CPPdta$FG <- CPPdta$deprivationFG_2012
CPPdta <- CPPdta[c(1:6,9)]
write_csv(CPPdta, "data/CPPcleandata.csv")
##File for data prep.
#RUN AFTER clean data.R and AddFGtoData.R and ShapefileCode.R
setwd("C:/Users/cassidy.nicholas/OneDrive - IS/CPOP")
library(plyr)
library(tidyverse)
library(DT)
library(data.table)
library(readxl)
#Store value for the start year and most recent year data is available, this needs to be changed when data is refreshed annually
StrtYear <- "2009/10"
RcntYear <- "2020/21"
SpPolysDF <- read_rds("data/Shapes.rds")
SpPolysIZ <- read_rds("data/IZshapes.rds")
CPPdta <- read_csv("data/CPPcleandata.csv")
IGZdta <- read_csv("data/IGZcleandata.csv")
DZdta <- read_csv("data/DZcleandata.csv")
#read Fife strategic areas for bespoke analysis
fife_sa <- read_excel("data/Fife Strategic Areas.xlsx")
#Calculate percentiles for map colours------------
povDecs <- c()
for(i in unique(SpPolysDF@data$council)){
x <- ntile(SpPolysDF@data[SpPolysDF@data$council == i, 13], 7)
povDecs <-c(povDecs,x)
}
tariffDecs <- c()
for(i in unique(SpPolysDF@data$council)){
x <- ntile(SpPolysDF@data[SpPolysDF@data$council == i, 14], 7)
tariffDecs <-c(tariffDecs,x)
}
benDecs <- c()
for(i in unique(SpPolysDF@data$council)){
x <- ntile(SpPolysDF@data[SpPolysDF@data$council == i, 15], 7)
benDecs <-c(benDecs,x)
}
crimeDecs <- c()
for(i in unique(SpPolysDF@data$council)){
x <- ntile(SpPolysDF@data[SpPolysDF@data$council == i, 16], 7)
crimeDecs <-c(crimeDecs,x)
}
admisDecs <- c()
for(i in unique(SpPolysDF@data$council)){
x <- ntile(SpPolysDF@data[SpPolysDF@data$council == i, 17], 7)
admisDecs <-c(admisDecs,x)
}
SpPolysDF@data <- cbind(SpPolysDF@data, povDecs, tariffDecs,benDecs,crimeDecs, admisDecs)
rm(i, x, povDecs, tariffDecs,benDecs,crimeDecs, admisDecs)
# Create CPP Scores and Type Scores for most recent years data----------
IGZdta <- IGZdta %>% mutate(`High is Positive?` = "Yes")
IGZdta$`High is Positive?`[IGZdta$Indicator %in% c("Child Poverty","Out of Work Benefits",
"Crime Rate", "Emergency Admissions",
"Early Mortality", "Depopulation")] <- "No"
IGZ_latest <- filter(IGZdta, Year == RcntYear)
# CPP Score
IGZ_latest <- ddply(
IGZ_latest,.
(CPP, Indicator),
transform,
CPPMean = (mean(value))
)
IGZ_latest$Differences <- IGZ_latest$value - IGZ_latest$CPPMean
IGZ_latest <- ddply(
IGZ_latest,.
(CPP, Indicator),
transform,
StdDev = (sd(value))
)
IGZ_latest$ZScore <- IGZ_latest$Differences / IGZ_latest$StdDev
# If high is bad multiply Z score by minus 1 to ensure direction is the same for all indicators
IGZ_latest$CPPScore <- IGZ_latest$ZScore
IGZ_latest$CPPScore[IGZ_latest$High.is.Positive. == "No"] <-
(IGZ_latest$CPPScore[IGZ_latest$High.is.Positive. =="No"]) * -1
IGZ_latest <- select(IGZ_latest, c(-CPPMean, -Differences, -StdDev, -ZScore))
# Type Score
IGZ_latest <- ddply(
IGZ_latest,.
(Typology_Group, Indicator),
transform,
TypeMean = (mean(value))
)
IGZ_latest$Differences <- IGZ_latest$value - IGZ_latest$TypeMean
IGZ_latest <- ddply(
IGZ_latest,.
(Typology_Group, Indicator),
transform,
StdDev = (sd(value))
)
IGZ_latest$ZScore <- IGZ_latest$Differences / IGZ_latest$StdDev
# If high is bad multiply Z score by minus 1 to ensure direction is the same for all indicators
IGZ_latest$TypeScore <- IGZ_latest$ZScore
IGZ_latest$TypeScore[IGZ_latest$High.is.Positive. =="No"] <-
(IGZ_latest$TypeScore[IGZ_latest$High.is.Positive. =="No"]) * -1
IGZ_latest <- select(IGZ_latest, c(-TypeMean, -Differences, -StdDev, -ZScore))
##get latest scores for Fife Strategic Areas================
# CPP Score
IGZ_latest_Fife <- IGZ_latest %>% filter(CPP == "Fife") %>%
left_join(fife_sa[c(1,4)], by = c(InterZone = "AreaCode"))
IGZ_latest_Fife <- ddply(
IGZ_latest_Fife,.
(`Strategic Area`, Indicator),
transform,
CPPMean = (mean(value))
)
IGZ_latest_Fife$Differences <- IGZ_latest_Fife$value - IGZ_latest_Fife$CPPMean
IGZ_latest_Fife <- ddply(
IGZ_latest_Fife,.
(`Strategic Area`, Indicator),
transform,
StdDev = (sd(value))
)
IGZ_latest_Fife$ZScore <- IGZ_latest_Fife$Differences / IGZ_latest_Fife$StdDev
# If high is bad multiply Z score by minus 1 to ensure direction is the same for all indicators
IGZ_latest_Fife$CPPScore <- IGZ_latest_Fife$ZScore
IGZ_latest_Fife$CPPScore[IGZ_latest_Fife$High.is.Positive. == "No"] <-
(IGZ_latest_Fife$CPPScore[IGZ_latest_Fife$High.is.Positive. =="No"]) * -1
IGZ_latest_Fife <- select(IGZ_latest_Fife, c(-CPPMean, -Differences, -StdDev, -ZScore))
# Create CPP Scores & Typology Scores for the change from start to finish year--------------
# calculate overall zscore for change
IGZ_change <- filter(IGZdta, Year %in% c(StrtYear,RcntYear))
# need to group by CPP + IGZ here to stop IGZ's with the same name getting included in the wrong group
IGZ_change <- ddply(
IGZ_change,.
(InterZone, Indicator),
transform,
Change = (last(value) / first(value) -1)
)
# If high is bad multiply Z score by minus 1 to ensure direction is the same for all indicators
IGZ_change$Change[IGZ_change$High.is.Positive. == "No"] <-
(IGZ_change$Change[IGZ_change$High.is.Positive. == "No"]) * -1
# Filtering data so that change value is only included once per IGZ
IGZ_change <- filter(IGZ_change, Year == RcntYear)
IGZ_change <- ddply(
IGZ_change,.
(Indicator),
transform,
OverallMean = (mean(Change))
)
IGZ_change$Differences <- IGZ_change$Change - IGZ_change$OverallMean
IGZ_change <- ddply(
IGZ_change,.
(Indicator),
transform,
StdDev = (sd(Change))
)
IGZ_change$OverallZScore <- IGZ_change$Differences / IGZ_change$StdDev
IGZ_change <- select(IGZ_change, c(-OverallMean, -Differences, -StdDev))
# Calculate CPP Change Score
IGZ_change <- ddply(
IGZ_change,.
(CPP, Indicator),
transform,
CPPMean = (mean(OverallZScore))
)
IGZ_change$Differences <- IGZ_change$OverallZScore - IGZ_change$CPPMean
IGZ_change <- ddply(
IGZ_change,.
(CPP, Indicator),
transform,
StdDev = (sd(OverallZScore))
)
IGZ_change$CPPChangeScore <- IGZ_change$Differences / IGZ_change$StdDev
IGZ_change <- select(IGZ_change, c(-CPPMean, -Differences, -StdDev))
# Calculate FIFE SA Change Scores
IGZ_change_Fife <- IGZ_change %>% filter(CPP == "Fife") %>%
left_join(fife_sa[c(1,4)], by = c(InterZone = "AreaCode"))
IGZ_change_Fife <- ddply(
IGZ_change_Fife,.
(`Strategic Area`, Indicator),
transform,
CPPMean = (mean(OverallZScore))
)
IGZ_change_Fife$Differences <- IGZ_change_Fife$OverallZScore - IGZ_change_Fife$CPPMean
IGZ_change_Fife <- ddply(
IGZ_change_Fife,.
(Strategic.Area, Indicator),
transform,
StdDev = (sd(OverallZScore))
)
IGZ_change_Fife$CPPChangeScore <- IGZ_change_Fife$Differences / IGZ_change_Fife$StdDev
IGZ_change_Fife <- select(IGZ_change_Fife, c(-CPPMean, -Differences, -StdDev))
# Calculate Typology Change Score
IGZ_change <- ddply(
IGZ_change,.
(Typology_Group, Indicator),
transform,
TypeMean = (mean(OverallZScore))
)
IGZ_change$Differences <- IGZ_change$OverallZScore - IGZ_change$TypeMean
IGZ_change <- ddply(
IGZ_change,.
(Typology_Group, Indicator),
transform,
StdDev = (sd(OverallZScore))
)
IGZ_change$TypeChangeScore <- IGZ_change$Differences / IGZ_change$StdDev
IGZ_change <- select(IGZ_change, c(-TypeMean, -Differences, -StdDev))
# Add Z score column to SpPolysDF to allow ranking in this DataFrame ---------------------
decs <- c()
# need to group by CPP + IGZ here to stop IGZ's with the same name getting included in the wrong group
decs <- ddply(IGZ_latest,.(InterZone, CPP), summarise, combCPP = sum(CPPScore)) %>%
ddply(., .(CPP), mutate, CPPDec = ntile(combCPP, n = 7)) %>%
ddply(.,.(CPP), mutate, CPPRank = frank(combCPP)) %>%
select(InterZone, CPPDec,CPPRank)
SpPolysIZ@data <- left_join(SpPolysIZ@data, decs, by = "InterZone") %>% select(-rank_decs, -`rank-min`)
names(SpPolysIZ@data)[c(13,14)] <- c("rank_decs", "rank-min")
saveRDS(SpPolysDF, "data/Shapes_decs.rds")
saveRDS(SpPolysIZ, "data/IZshapes_decs.rds")
write_csv(IGZ_change,"data/IGZ_change.csv")
write_csv(IGZ_latest,"data/IGZ_latest.csv")
#tidy and write Fife files
IGZ_latest_Fife <- IGZ_latest_Fife %>% select(c(`Strategic Area`, InterZone, CPPScore, Indicator)) %>% rename(SAScore = CPPScore)
IGZ_change_Fife <- IGZ_change_Fife %>% select(c(`Strategic.Area`, InterZone, CPPChangeScore, Indicator)) %>% rename(SAChangeScore = CPPChangeScore)
write_csv(IGZ_latest_Fife,"data/IGZ_latest_Fife.csv")
write_csv(IGZ_change_Fife,"data/IGZ_change_Fife.csv")
# Compute CPP improvement rates and store data for plots on CPP over time page--------
CPP_dta_current <- CPPdta %>%
mutate(LineType =  paste(CPP, Type)) %>%
filter(Type != "Projected")
CPP_dta_current <- setDT(CPP_dta_current)[, Improvement_Rate :=
(last(value) / first(value) -1) * 100,
by = list(CPP, Indicator)
]
CPP_dta_current <- CPP_dta_current %>% mutate(`High is Positive?` = "Yes")
CPP_dta_current$`High is Positive?`[CPP_dta_current$Indicator %in% c("Dwelling Fires",
"Unplanned Hospital Attendances",
"Fuel Poverty", "Fragility",
"Carbon Emissions", "Child Poverty",
"Out of Work Benefits", "Crime Rate",
"Emergency Admissions",
"Early Mortality")
] <- "No"
write_csv(CPP_dta_current, "data/Imp_rate_CPP.csv")
##This code creates a tidy table for the "Vulnerable Communities" page
##Note that the IGZs selected as most vulnerable do not change, but they can be updated to reflect
##new data using the "Calculate most vuln communities.R" code
setwd("C:/Users/cassidy.nicholas/OneDrive - IS/CPOP")
library(plyr) ###plyr causes issues if loaded after dplyr, so I recommend restarting before running
library(tidyverse)
###NO LONGER NEED THIS AS GOING TO SELECT EARLIEST AND MOST RECENT YEAR
#StartYear <- "2006/07"
#EndYear   <- "2017/18"
CPPNames <- c("Aberdeen City", "Aberdeenshire", "Angus", "Argyll and Bute",
"Clackmannanshire", "Dumfries and Galloway","Dundee City",
"East Ayrshire", "East Dunbartonshire", "East Lothian",
"East Renfrewshire", "Edinburgh, City of", "Eilean Siar",
"Falkirk", "Fife", "Glasgow City", "Highland",
"Inverclyde", "Midlothian", "North Ayrshire",
"North Lanarkshire", "Orkney Islands", "Perth and Kinross",
"Renfrewshire", "Scottish Borders",
"Shetland Islands", "South Ayrshire", "South Lanarkshire",
"Stirling", "West Dunbartonshire", "West Lothian")
IGZData <- read_csv("data/IGZcleandata.csv")[c(1:9)]
colnames(IGZData)[1] <- "IGZ"
IGZData <- filter(IGZData, Type == "Raw data") %>% select(-Type)
##get labels for later
Labels <- IGZData[1:2]
IGZData <- IGZData[-2]
#Calculations for table 1 --------------------------------------------------------------------------------
#Calculate Change and CPPChangeScore----------------------------------------------------------------------
IGZData$HighIsPos <- "No"
IGZData$HighIsPos[IGZData$Indicator %in% c("Attainment", "Participation Rate")] <- "Yes"
#Calculate %Change from First year to last year
TableData <- IGZData %>%
group_by(Indicator) %>%
filter(Year %in% c(first(Year), last(Year)))
yrs1 <- TableData %>% group_by(Indicator) %>%filter(Year == first(Year)) %>% mutate(YearRef = 1)
yrs2 <- TableData %>% group_by(Indicator) %>%filter(Year == last(Year)) %>% mutate(YearRef = 2)
TableData <-rbind(yrs1,yrs2)
TableData <- TableData[order(TableData$YearRef),]
TableData[,7] <- round(TableData[,7],2)
TableData <- ddply(
TableData,.
(IGZ, Indicator),
transform,
Change = ((last(value) / first(value))-1)*100
)
#Calculate the average of this change for each CPP
TableData <- ddply(
TableData,.
(CPP, Indicator, YearRef),
transform,
CPPChangeMean = mean(Change)
)
#Calculate the Standard Deviation of this change for each CPP
TableData <- ddply(
TableData,.
(CPP, Indicator, YearRef),
transform,
CPPChangeSD = sd(Change)
)
#Calculate difference between change value and CPPChangeMean
TableData$Differences <- TableData$Change - TableData$CPPChangeMean
#Calculate CPPChangeScore (z score - describes how many standard deviations the change in value is from the
#mean change in value for that CPP)
TableData$CPPChangeScore <- TableData$Differences / TableData$CPPChangeSD
#Remove columns which are not needed, CPPChangeMean, CPPChangeSD & Differences
TableData <- TableData[,-c(11:13)]
#Multiply change values and scores by -1 so that improvement is always measured by a positive value
TableData$CPPChangeScore[TableData$HighIsPos == "No"] <- TableData$CPPChangeScore[TableData$HighIsPos == "No"]*-1
#Calculate combined change and type scores, combining the individual scores for each outcome
TableData <- ddply(
TableData,.
(IGZ, YearRef),
transform,
CombinedCPPChangeScore = sum(CPPChangeScore)
)
#Remove columns which are not needed, CPPChangeScore, TypeChangeScore
TableData <- TableData[,-11]
#Add in CPP average of values to include as bottom row
TableData <- ddply(
TableData,.
(CPP, Indicator, YearRef),
transform,
CPPAverage = mean(value)
)
#Format data table-----------------------------------------------------------------------------------------
#Separate out change values only and format
ChangeData <- TableData[,c(1,2,3,4,5,6,8,9,10,12)]
ChangeData$Label <- "Change"
#Calculate change over time in CPP average
ChangeData <- ddply(
ChangeData,.
(IGZ, Indicator),
transform,
ChangeAv = ((last(CPPAverage) / first(CPPAverage))-1)*100
)
#Multiply change values and scores by -1 so that improvement is always measured by a positive value
ChangeData$Change[ChangeData$HighIsPos == "No"] <- ChangeData$Change[ChangeData$HighIsPos == "No"]*-1
ChangeData$ChangeAv[ChangeData$HighIsPos == "No"] <- ChangeData$ChangeAv[ChangeData$HighIsPos == "No"]*-1
ChangeData$Rate <- ifelse(
ChangeData$Change > ChangeData$ChangeAv,
"Faster",
ifelse(
ChangeData$Change < ChangeData$ChangeAv,
"Slower",
"No Difference"
)
)
ChangeData <- ChangeData[,-c(7,8,9,10,12)]
ChangeData <- ChangeData %>% unite(DataSpec, Indicator, Year, Label, sep = "_")
ChangeData <- ChangeData %>% spread(DataSpec, Rate)
#Only want to keep 1 year for each outcome so remove first year values
ChangeData <- ChangeData[,-c(5,7,9,11,13,15,17,19)]
#Separate out CPPScores only and format
CPPScoreData <- TableData[,c(1,2,3,4,5,6,8,9,11)]
CPPScoreData$Label <- "CPPScore"
CPPScoreData <- CPPScoreData %>% unite(DataSpec, Indicator, Year, Label, sep = "_")
CPPScoreData <- CPPScoreData[,-c(6:7)]
CPPScoreData <- CPPScoreData %>% spread(DataSpec, CombinedCPPChangeScore)
CPPScoreData[,c(5:20)] <- ifelse(
CPPScoreData[,c(5:20)] > 0,
"Faster",
ifelse(
CPPScoreData[,c(5:20)] == 0,
"No Difference",
"Slower"
)
)
#Only want to keep 1 column, as the score should be the same for every year and every outcome
CPPScoreData <- CPPScoreData[,-c(5:19)]
#Seperate out CPPMean only and format
CPPMeanData <- TableData[,c(1,2,3,4,5,6,8,9,12)]
CPPMeanData$Label <- "CPPMean"
CPPMeanData <- CPPMeanData %>% unite(DataSpec, Indicator, Year, sep = "_")
CPPMeanData <- CPPMeanData[,-c(6:7)]
CPPMeanData <- CPPMeanData %>% spread(DataSpec, CPPAverage)
#Format TableData
TableData <- TableData[,c(1:7)]
TableData$Label <- "Value"
TableData <- TableData %>% unite(DataSpec, Indicator, Year, sep = "_")
TableData <- TableData %>% spread(DataSpec, value)
#Add Vulnerable Community Labels to data and filter to only include these
DeprivedCommData <- read_csv("data/Vulnerable communities - 2006.csv")
DeprivedCommData <- filter(DeprivedCommData, revRank %in% 1:5) %>%
select(c("InterZone", "revRank"))
colnames(DeprivedCommData) <- c("IGZ","Most_Deprived_Comm")
DeprivedCommData <- DeprivedCommData[c(2,1)]
TableData <- merge(TableData, DeprivedCommData)
TableData <- TableData[order(TableData$Most_Deprived_Comm),]
CPPMeanData <- merge(CPPMeanData, DeprivedCommData)
CPPMeanData <- filter(CPPMeanData, `Most_Deprived_Comm` == 1)
total <- rbind(TableData, CPPMeanData)
total <- merge(total, ChangeData)
total <- merge(total, CPPScoreData)
total <- unique(left_join(total, Labels, by = "IGZ"))
total <- total[,-c(1,3,4)]
#relabel in correct format, format in correct order and round values
total$Most_Deprived_Comm[total$Label == "CPPMean"] <- "6"
total <- total[order(total$Most_Deprived_Comm),]
total$Most_Deprived_Comm[total$Most_Deprived_Comm == 1] <- "1st Most Vulnerable"
total$Most_Deprived_Comm[total$Most_Deprived_Comm == 5] <- "5th Most Vulnerable"
total$AreaLabel <- total$CPP
total$AreaLabel[total$Label == "CPPMean"] <- "CPP Average"
total <- total[,-2]
total <- total[,c(29,1,18,27,4,5,20,6,7,21,8,9,22,10,11,23,12,13,24,14,15,25,16,17,26,2,3,19,28)]
total[,c(5,6,11,12,20,21,23,24,26,27)] <- round(total[,c(5,6,11,12,20,21,23,24,26,27)],2)
total[,c(8,9,14,15,17,18)] <- round(total[,c(8,9,14,15,17,18)],0)
total[total$AreaLabel =="CPP Average","InterZone_Name"] <- paste(total[total$AreaLabel =="CPP Average","CPP"], "Average")
#Save formatted data
write_excel_csv(total, file = "data/Formatted Vulnerable Communities.csv")
###Calculate the rankings and deciles for income deprivation
#Run this before PrepDecileData.R
setwd("C:/Users/cassidy.nicholas/OneDrive - IS/CPOP")
library(readxl)
library(dplyr)
library(readr)
detach(package:plyr)
##read dataset
DZdta <- read_excel("C:/Users/cassidy.nicholas/OneDrive - IS/CPOP/data/DZ & IGZ income data.xlsx", sheet = 2)
IGZdta <- read_excel("C:/Users/cassidy.nicholas/OneDrive - IS/CPOP/data/DZ & IGZ income data.xlsx", sheet = 3)[-4]
names(IGZdta)[5] <- "CPP"
##get count of dzs, get ranks and deciles
DZdta$count <- ave(DZdta$Council_area, DZdta$Council_area, FUN = length)
DZdta <- DZdta %>% group_by(Council_area) %>%
mutate(decl = ntile(`%`, 10))%>% ungroup()
#just double check IGZ ranks
IGZdta$count <- ave(IGZdta$CPP, IGZdta$CPP, FUN = length)
IGZdta<- IGZdta %>% dplyr::group_by(CPP) %>%
mutate(decl = ntile(`% of income deprived`, 10)) %>% ungroup()
sum(IGZdta$`Decile with CPP` != IGZdta$decl)
#so 345 don't match - which ones?
noMatch <- IGZdta[IGZdta$decl != IGZdta$`Decile with CPP`,]
##most mismatches are to do with rounding, but some issues e.g. Eilean SIar has
#no decile 1 in previous figs, so will keep new deciles
##Need to make decile 9 for isles into decile 10 as they have below 10 IGZs
IGZdta[IGZdta$CPP== "Shetland Islands"&IGZdta$decl== 7, "decl"] <- 10
IGZdta[IGZdta$CPP== "Orkney Islands"&IGZdta$decl== 6, "decl"] <- 10
IGZdta[IGZdta$CPP== "Eilean Siar"&IGZdta$decl== 9, "decl"] <- 10
##Get Scotland deciles
IGZdta <- IGZdta %>% mutate(ScotDecl = ntile(`% of income deprived`, 10))
DZdta <- DZdta %>% mutate(ScotDecl = ntile(`%`,10))
IGZDeciles <- IGZdta[c(1,5,10,11)]
colnames(IGZDeciles) <- c("AreaCode", "CouncilName", "Decile", "ScotlandDec")
DZDeciles <- DZdta[c(1,2,8,9)]
colnames(DZDeciles) <- c("AreaCode", "CouncilName", "Decile", "ScotlandDec")
#merge and save
allDta <- rbind(IGZDeciles[1:4],DZDeciles[1:4])
write_csv(allDta, "C:/Users/cassidy.nicholas/OneDrive - IS/CPOP/data/IncomeDeciles.csv")
##prepare inequality data for table
##Run this after Create income deciles.R
library(tidyverse)
setwd("C:/Users/cassidy.nicholas/OneDrive - IS/CPOP")
decDta <- read_csv("data/IncomeDeciles.csv")
IGZdta <- read_csv("data/IGZcleandata.csv")
IGZdta <- left_join(IGZdta, decDta, by = c("InterZone" = "AreaCode"))
##Make imputed data NAs and then remove them - in case
#  we change our mind about how to display this in future
IGZdta[IGZdta$Type=="Imputed"|IGZdta$Type=="Projected", "value"] <- NA
IGZdta <- IGZdta[complete.cases(IGZdta$value),]
scotvals <- filter(IGZdta, ScotlandDec %in% c(1,10)) %>%
group_by(ScotlandDec, Indicator, Year) %>%
summarise_at(vars(value), list(mean))
scotvals$CouncilName <- "Scotland"
names(scotvals)[1] <- "Decile"
dd <- IGZdta[c(12,11,6,8:10)] %>% filter(Decile %in% c(1,10))%>%
group_by(Decile,Year,CouncilName,Indicator) %>%
summarise_at(vars(value), list(mean))
dd <- bind_rows(dd[1:5], scotvals[1:5])
##Previously we wanted to keep every year, but no longer! - get last year
#dd <- filter(dd, Year %in% c("2006/07", "2007/08", "2008/09", "2009/10",
#                             "2010/11", "2011/12", "2012/13",
#                             "2013/14", "2014/15", "2015/16", "2016/17", "2017/18"))
dd <- dd %>% dplyr::arrange(Year) %>% group_by(Indicator)%>%
filter(Year ==last(Year))
dd[dd$Indicator == "Child Poverty",4] <- "Child Poverty (%)"
dd[dd$Indicator == "Crime Rate",4] <- "Crime Rate, per 10,000"
dd[dd$Indicator == "Early Mortality",4] <- "Early Mortality, per 100,000"
dd[dd$Indicator == "Emergency Admissions",4] <- "Emergency Admissions, per 100,000"
dd[dd$Indicator == "Out of Work Benefits",4] <- "Out of Work Benefits (%)"
dd[dd$Indicator == "Participation",4] <- "Participation Rate (%)"
dd[dd$Indicator == "Depopulation",4] <- "Depopulation Index"
dd[dd$Indicator == "Attainment",4] <- "Average Highest Attainment"
saveRDS(dd, "data/DecileData.rds")
shiny::runApp()