-
Notifications
You must be signed in to change notification settings - Fork 1
/
-5CG8171Y0H.Rhistory
512 lines (512 loc) · 21.5 KB
/
-5CG8171Y0H.Rhistory
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
IGZdta[IGZdta$Indicator =="Participation Rate","IndicatorFullName"] <- "Participation Rate (%)"
IGZdta[IGZdta$Indicator =="Depopulation","IndicatorFullName"] <- "Depopulation Index"
IGZdta[IGZdta$Indicator == "Attainment", "IndicatorFullName"] <- "Average Highest Attainment"
#save CSV data file
write_excel_csv(IGZdta ,file = "data/IGZcleandata.csv")
##DZ data
DZdta <- read_excel("data/Final data - Aug 2022/Final DZ Data - Aug 22.xlsx",
sheet = 2)
for (i in 3:7) {
DZdta2 <- read_excel("data/Final data - Aug 2022/Final DZ Data - Aug 22.xlsx",
sheet = i)
DZdta <- left_join(DZdta, DZdta2, by = "DZ")
}
#combine columns into 1 long variable
DZdta <- gather(DZdta, Indicator_Type_Year, value, -1,-2,-3,-4)
#separate 1 column into 3
DZdta <- separate(DZdta, Indicator_Type_Year, c("Indicator","Type","Year"), sep = "_")
#save CSV data file
write_excel_csv(DZdta ,file = "data/DZcleandata.csv")
##File for adding family group info to CPP dta
#RUN AFTER Clean data.R
##After this run ShapefilesCode.R
###Match family groups to CPPs
library(tidyverse)
library(readxl)
#read in family group data
FGdta <- read_excel("data/Family Groups.xlsx")
names(FGdta)[1] <- "CPP"
FGdta[FGdta$CPP == "Edinburgh City",1] <- "Edinburgh, City of"
##read in CPP data
CPPdta <- read_csv("data/CPPcleandata.csv")
##match all FGs to CPP data
CPPdta <- left_join(CPPdta, FGdta, by = c("CPP" = "CPP"))
###if the indicator is rurality replace the FG - decided not to do this
#CPPdta$FG <-with(CPPdta, ifelse(Indicator %in% c("Fragility","Fuel Poverty"), popdensityFG_2012, deprivationFG_2012))
#CPPdta <- CPPdta[c(1:5,8)]
CPPdta$FG <- CPPdta$deprivationFG_2012
CPPdta <- CPPdta[c(1:6,9)]
write_csv(CPPdta, "data/CPPcleandata.csv")
##File for data prep.
#RUN AFTER clean data.R and AddFGtoData.R and ShapefileCode.R
setwd("C:/Users/cassidy.nicholas/OneDrive - IS/CPOP")
library(plyr)
library(tidyverse)
library(DT)
library(data.table)
library(readxl)
#Store value for the start year and most recent year data is available, this needs to be changed when data is refreshed annually
StrtYear <- "2009/10"
RcntYear <- "2020/21"
SpPolysDF <- read_rds("data/Shapes.rds")
SpPolysIZ <- read_rds("data/IZshapes.rds")
CPPdta <- read_csv("data/CPPcleandata.csv")
IGZdta <- read_csv("data/IGZcleandata.csv")
DZdta <- read_csv("data/DZcleandata.csv")
#read Fife strategic areas for bespoke analysis
fife_sa <- read_excel("data/Fife Strategic Areas.xlsx")
#Calculate percentiles for map colours------------
povDecs <- c()
for(i in unique(SpPolysDF@data$council)){
x <- ntile(SpPolysDF@data[SpPolysDF@data$council == i, 13], 7)
povDecs <-c(povDecs,x)
}
tariffDecs <- c()
for(i in unique(SpPolysDF@data$council)){
x <- ntile(SpPolysDF@data[SpPolysDF@data$council == i, 14], 7)
tariffDecs <-c(tariffDecs,x)
}
benDecs <- c()
for(i in unique(SpPolysDF@data$council)){
x <- ntile(SpPolysDF@data[SpPolysDF@data$council == i, 15], 7)
benDecs <-c(benDecs,x)
}
crimeDecs <- c()
for(i in unique(SpPolysDF@data$council)){
x <- ntile(SpPolysDF@data[SpPolysDF@data$council == i, 16], 7)
crimeDecs <-c(crimeDecs,x)
}
admisDecs <- c()
for(i in unique(SpPolysDF@data$council)){
x <- ntile(SpPolysDF@data[SpPolysDF@data$council == i, 17], 7)
admisDecs <-c(admisDecs,x)
}
SpPolysDF@data <- cbind(SpPolysDF@data, povDecs, tariffDecs,benDecs,crimeDecs, admisDecs)
rm(i, x, povDecs, tariffDecs,benDecs,crimeDecs, admisDecs)
# Create CPP Scores and Type Scores for most recent years data----------
IGZdta <- IGZdta %>% mutate(`High is Positive?` = "Yes")
IGZdta$`High is Positive?`[IGZdta$Indicator %in% c("Child Poverty","Out of Work Benefits",
"Crime Rate", "Emergency Admissions",
"Early Mortality", "Depopulation")] <- "No"
IGZ_latest <- filter(IGZdta, Year == RcntYear)
# CPP Score
IGZ_latest <- ddply(
IGZ_latest,.
(CPP, Indicator),
transform,
CPPMean = (mean(value))
)
IGZ_latest$Differences <- IGZ_latest$value - IGZ_latest$CPPMean
IGZ_latest <- ddply(
IGZ_latest,.
(CPP, Indicator),
transform,
StdDev = (sd(value))
)
IGZ_latest$ZScore <- IGZ_latest$Differences / IGZ_latest$StdDev
# If high is bad multiply Z score by minus 1 to ensure direction is the same for all indicators
IGZ_latest$CPPScore <- IGZ_latest$ZScore
IGZ_latest$CPPScore[IGZ_latest$High.is.Positive. == "No"] <-
(IGZ_latest$CPPScore[IGZ_latest$High.is.Positive. =="No"]) * -1
IGZ_latest <- select(IGZ_latest, c(-CPPMean, -Differences, -StdDev, -ZScore))
# Type Score
IGZ_latest <- ddply(
IGZ_latest,.
(Typology_Group, Indicator),
transform,
TypeMean = (mean(value))
)
IGZ_latest$Differences <- IGZ_latest$value - IGZ_latest$TypeMean
IGZ_latest <- ddply(
IGZ_latest,.
(Typology_Group, Indicator),
transform,
StdDev = (sd(value))
)
IGZ_latest$ZScore <- IGZ_latest$Differences / IGZ_latest$StdDev
# If high is bad multiply Z score by minus 1 to ensure direction is the same for all indicators
IGZ_latest$TypeScore <- IGZ_latest$ZScore
IGZ_latest$TypeScore[IGZ_latest$High.is.Positive. =="No"] <-
(IGZ_latest$TypeScore[IGZ_latest$High.is.Positive. =="No"]) * -1
IGZ_latest <- select(IGZ_latest, c(-TypeMean, -Differences, -StdDev, -ZScore))
##get latest scores for Fife Strategic Areas================
# CPP Score
IGZ_latest_Fife <- IGZ_latest %>% filter(CPP == "Fife") %>%
left_join(fife_sa[c(1,4)], by = c(InterZone = "AreaCode"))
IGZ_latest_Fife <- ddply(
IGZ_latest_Fife,.
(`Strategic Area`, Indicator),
transform,
CPPMean = (mean(value))
)
IGZ_latest_Fife$Differences <- IGZ_latest_Fife$value - IGZ_latest_Fife$CPPMean
IGZ_latest_Fife <- ddply(
IGZ_latest_Fife,.
(`Strategic Area`, Indicator),
transform,
StdDev = (sd(value))
)
IGZ_latest_Fife$ZScore <- IGZ_latest_Fife$Differences / IGZ_latest_Fife$StdDev
# If high is bad multiply Z score by minus 1 to ensure direction is the same for all indicators
IGZ_latest_Fife$CPPScore <- IGZ_latest_Fife$ZScore
IGZ_latest_Fife$CPPScore[IGZ_latest_Fife$High.is.Positive. == "No"] <-
(IGZ_latest_Fife$CPPScore[IGZ_latest_Fife$High.is.Positive. =="No"]) * -1
IGZ_latest_Fife <- select(IGZ_latest_Fife, c(-CPPMean, -Differences, -StdDev, -ZScore))
# Create CPP Scores & Typology Scores for the change from start to finish year--------------
# calculate overall zscore for change
IGZ_change <- filter(IGZdta, Year %in% c(StrtYear,RcntYear))
# need to group by CPP + IGZ here to stop IGZ's with the same name getting included in the wrong group
IGZ_change <- ddply(
IGZ_change,.
(InterZone, Indicator),
transform,
Change = (last(value) / first(value) -1)
)
# If high is bad multiply Z score by minus 1 to ensure direction is the same for all indicators
IGZ_change$Change[IGZ_change$High.is.Positive. == "No"] <-
(IGZ_change$Change[IGZ_change$High.is.Positive. == "No"]) * -1
# Filtering data so that change value is only included once per IGZ
IGZ_change <- filter(IGZ_change, Year == RcntYear)
IGZ_change <- ddply(
IGZ_change,.
(Indicator),
transform,
OverallMean = (mean(Change))
)
IGZ_change$Differences <- IGZ_change$Change - IGZ_change$OverallMean
IGZ_change <- ddply(
IGZ_change,.
(Indicator),
transform,
StdDev = (sd(Change))
)
IGZ_change$OverallZScore <- IGZ_change$Differences / IGZ_change$StdDev
IGZ_change <- select(IGZ_change, c(-OverallMean, -Differences, -StdDev))
# Calculate CPP Change Score
IGZ_change <- ddply(
IGZ_change,.
(CPP, Indicator),
transform,
CPPMean = (mean(OverallZScore))
)
IGZ_change$Differences <- IGZ_change$OverallZScore - IGZ_change$CPPMean
IGZ_change <- ddply(
IGZ_change,.
(CPP, Indicator),
transform,
StdDev = (sd(OverallZScore))
)
IGZ_change$CPPChangeScore <- IGZ_change$Differences / IGZ_change$StdDev
IGZ_change <- select(IGZ_change, c(-CPPMean, -Differences, -StdDev))
# Calculate FIFE SA Change Scores
IGZ_change_Fife <- IGZ_change %>% filter(CPP == "Fife") %>%
left_join(fife_sa[c(1,4)], by = c(InterZone = "AreaCode"))
IGZ_change_Fife <- ddply(
IGZ_change_Fife,.
(`Strategic Area`, Indicator),
transform,
CPPMean = (mean(OverallZScore))
)
IGZ_change_Fife$Differences <- IGZ_change_Fife$OverallZScore - IGZ_change_Fife$CPPMean
IGZ_change_Fife <- ddply(
IGZ_change_Fife,.
(Strategic.Area, Indicator),
transform,
StdDev = (sd(OverallZScore))
)
IGZ_change_Fife$CPPChangeScore <- IGZ_change_Fife$Differences / IGZ_change_Fife$StdDev
IGZ_change_Fife <- select(IGZ_change_Fife, c(-CPPMean, -Differences, -StdDev))
# Calculate Typology Change Score
IGZ_change <- ddply(
IGZ_change,.
(Typology_Group, Indicator),
transform,
TypeMean = (mean(OverallZScore))
)
IGZ_change$Differences <- IGZ_change$OverallZScore - IGZ_change$TypeMean
IGZ_change <- ddply(
IGZ_change,.
(Typology_Group, Indicator),
transform,
StdDev = (sd(OverallZScore))
)
IGZ_change$TypeChangeScore <- IGZ_change$Differences / IGZ_change$StdDev
IGZ_change <- select(IGZ_change, c(-TypeMean, -Differences, -StdDev))
# Add Z score column to SpPolysDF to allow ranking in this DataFrame ---------------------
decs <- c()
# need to group by CPP + IGZ here to stop IGZ's with the same name getting included in the wrong group
decs <- ddply(IGZ_latest,.(InterZone, CPP), summarise, combCPP = sum(CPPScore)) %>%
ddply(., .(CPP), mutate, CPPDec = ntile(combCPP, n = 7)) %>%
ddply(.,.(CPP), mutate, CPPRank = frank(combCPP)) %>%
select(InterZone, CPPDec,CPPRank)
SpPolysIZ@data <- left_join(SpPolysIZ@data, decs, by = "InterZone") %>% select(-rank_decs, -`rank-min`)
names(SpPolysIZ@data)[c(13,14)] <- c("rank_decs", "rank-min")
saveRDS(SpPolysDF, "data/Shapes_decs.rds")
saveRDS(SpPolysIZ, "data/IZshapes_decs.rds")
write_csv(IGZ_change,"data/IGZ_change.csv")
write_csv(IGZ_latest,"data/IGZ_latest.csv")
#tidy and write Fife files
IGZ_latest_Fife <- IGZ_latest_Fife %>% select(c(`Strategic Area`, InterZone, CPPScore, Indicator)) %>% rename(SAScore = CPPScore)
IGZ_change_Fife <- IGZ_change_Fife %>% select(c(`Strategic.Area`, InterZone, CPPChangeScore, Indicator)) %>% rename(SAChangeScore = CPPChangeScore)
write_csv(IGZ_latest_Fife,"data/IGZ_latest_Fife.csv")
write_csv(IGZ_change_Fife,"data/IGZ_change_Fife.csv")
# Compute CPP improvement rates and store data for plots on CPP over time page--------
CPP_dta_current <- CPPdta %>%
mutate(LineType = paste(CPP, Type)) %>%
filter(Type != "Projected")
CPP_dta_current <- setDT(CPP_dta_current)[, Improvement_Rate :=
(last(value) / first(value) -1) * 100,
by = list(CPP, Indicator)
]
CPP_dta_current <- CPP_dta_current %>% mutate(`High is Positive?` = "Yes")
CPP_dta_current$`High is Positive?`[CPP_dta_current$Indicator %in% c("Dwelling Fires",
"Unplanned Hospital Attendances",
"Fuel Poverty", "Fragility",
"Carbon Emissions", "Child Poverty",
"Out of Work Benefits", "Crime Rate",
"Emergency Admissions",
"Early Mortality")
] <- "No"
write_csv(CPP_dta_current, "data/Imp_rate_CPP.csv")
##This code creates a tidy table for the "Vulnerable Communities" page
##Note that the IGZs selected as most vulnerable do not change, but they can be updated to reflect
##new data using the "Calculate most vuln communities.R" code
setwd("C:/Users/cassidy.nicholas/OneDrive - IS/CPOP")
library(plyr) ###plyr causes issues if loaded after dplyr, so I recommend restarting before running
library(tidyverse)
###NO LONGER NEED THIS AS GOING TO SELECT EARLIEST AND MOST RECENT YEAR
#StartYear <- "2006/07"
#EndYear <- "2017/18"
CPPNames <- c("Aberdeen City", "Aberdeenshire", "Angus", "Argyll and Bute",
"Clackmannanshire", "Dumfries and Galloway","Dundee City",
"East Ayrshire", "East Dunbartonshire", "East Lothian",
"East Renfrewshire", "Edinburgh, City of", "Eilean Siar",
"Falkirk", "Fife", "Glasgow City", "Highland",
"Inverclyde", "Midlothian", "North Ayrshire",
"North Lanarkshire", "Orkney Islands", "Perth and Kinross",
"Renfrewshire", "Scottish Borders",
"Shetland Islands", "South Ayrshire", "South Lanarkshire",
"Stirling", "West Dunbartonshire", "West Lothian")
IGZData <- read_csv("data/IGZcleandata.csv")[c(1:9)]
colnames(IGZData)[1] <- "IGZ"
IGZData <- filter(IGZData, Type == "Raw data") %>% select(-Type)
##get labels for later
Labels <- IGZData[1:2]
IGZData <- IGZData[-2]
#Calculations for table 1 --------------------------------------------------------------------------------
#Calculate Change and CPPChangeScore----------------------------------------------------------------------
IGZData$HighIsPos <- "No"
IGZData$HighIsPos[IGZData$Indicator %in% c("Attainment", "Participation Rate")] <- "Yes"
#Calculate %Change from First year to last year
TableData <- IGZData %>%
group_by(Indicator) %>%
filter(Year %in% c(first(Year), last(Year)))
yrs1 <- TableData %>% group_by(Indicator) %>%filter(Year == first(Year)) %>% mutate(YearRef = 1)
yrs2 <- TableData %>% group_by(Indicator) %>%filter(Year == last(Year)) %>% mutate(YearRef = 2)
TableData <-rbind(yrs1,yrs2)
TableData <- TableData[order(TableData$YearRef),]
TableData[,7] <- round(TableData[,7],2)
TableData <- ddply(
TableData,.
(IGZ, Indicator),
transform,
Change = ((last(value) / first(value))-1)*100
)
#Calculate the average of this change for each CPP
TableData <- ddply(
TableData,.
(CPP, Indicator, YearRef),
transform,
CPPChangeMean = mean(Change)
)
#Calculate the Standard Deviation of this change for each CPP
TableData <- ddply(
TableData,.
(CPP, Indicator, YearRef),
transform,
CPPChangeSD = sd(Change)
)
#Calculate difference between change value and CPPChangeMean
TableData$Differences <- TableData$Change - TableData$CPPChangeMean
#Calculate CPPChangeScore (z score - describes how many standard deviations the change in value is from the
#mean change in value for that CPP)
TableData$CPPChangeScore <- TableData$Differences / TableData$CPPChangeSD
#Remove columns which are not needed, CPPChangeMean, CPPChangeSD & Differences
TableData <- TableData[,-c(11:13)]
#Multiply change values and scores by -1 so that improvement is always measured by a positive value
TableData$CPPChangeScore[TableData$HighIsPos == "No"] <- TableData$CPPChangeScore[TableData$HighIsPos == "No"]*-1
#Calculate combined change and type scores, combining the individual scores for each outcome
TableData <- ddply(
TableData,.
(IGZ, YearRef),
transform,
CombinedCPPChangeScore = sum(CPPChangeScore)
)
#Remove columns which are not needed, CPPChangeScore, TypeChangeScore
TableData <- TableData[,-11]
#Add in CPP average of values to include as bottom row
TableData <- ddply(
TableData,.
(CPP, Indicator, YearRef),
transform,
CPPAverage = mean(value)
)
#Format data table-----------------------------------------------------------------------------------------
#Separate out change values only and format
ChangeData <- TableData[,c(1,2,3,4,5,6,8,9,10,12)]
ChangeData$Label <- "Change"
#Calculate change over time in CPP average
ChangeData <- ddply(
ChangeData,.
(IGZ, Indicator),
transform,
ChangeAv = ((last(CPPAverage) / first(CPPAverage))-1)*100
)
#Multiply change values and scores by -1 so that improvement is always measured by a positive value
ChangeData$Change[ChangeData$HighIsPos == "No"] <- ChangeData$Change[ChangeData$HighIsPos == "No"]*-1
ChangeData$ChangeAv[ChangeData$HighIsPos == "No"] <- ChangeData$ChangeAv[ChangeData$HighIsPos == "No"]*-1
ChangeData$Rate <- ifelse(
ChangeData$Change > ChangeData$ChangeAv,
"Faster",
ifelse(
ChangeData$Change < ChangeData$ChangeAv,
"Slower",
"No Difference"
)
)
ChangeData <- ChangeData[,-c(7,8,9,10,12)]
ChangeData <- ChangeData %>% unite(DataSpec, Indicator, Year, Label, sep = "_")
ChangeData <- ChangeData %>% spread(DataSpec, Rate)
#Only want to keep 1 year for each outcome so remove first year values
ChangeData <- ChangeData[,-c(5,7,9,11,13,15,17,19)]
#Separate out CPPScores only and format
CPPScoreData <- TableData[,c(1,2,3,4,5,6,8,9,11)]
CPPScoreData$Label <- "CPPScore"
CPPScoreData <- CPPScoreData %>% unite(DataSpec, Indicator, Year, Label, sep = "_")
CPPScoreData <- CPPScoreData[,-c(6:7)]
CPPScoreData <- CPPScoreData %>% spread(DataSpec, CombinedCPPChangeScore)
CPPScoreData[,c(5:20)] <- ifelse(
CPPScoreData[,c(5:20)] > 0,
"Faster",
ifelse(
CPPScoreData[,c(5:20)] == 0,
"No Difference",
"Slower"
)
)
#Only want to keep 1 column, as the score should be the same for every year and every outcome
CPPScoreData <- CPPScoreData[,-c(5:19)]
#Seperate out CPPMean only and format
CPPMeanData <- TableData[,c(1,2,3,4,5,6,8,9,12)]
CPPMeanData$Label <- "CPPMean"
CPPMeanData <- CPPMeanData %>% unite(DataSpec, Indicator, Year, sep = "_")
CPPMeanData <- CPPMeanData[,-c(6:7)]
CPPMeanData <- CPPMeanData %>% spread(DataSpec, CPPAverage)
#Format TableData
TableData <- TableData[,c(1:7)]
TableData$Label <- "Value"
TableData <- TableData %>% unite(DataSpec, Indicator, Year, sep = "_")
TableData <- TableData %>% spread(DataSpec, value)
#Add Vulnerable Community Labels to data and filter to only include these
DeprivedCommData <- read_csv("data/Vulnerable communities - 2006.csv")
DeprivedCommData <- filter(DeprivedCommData, revRank %in% 1:5) %>%
select(c("InterZone", "revRank"))
colnames(DeprivedCommData) <- c("IGZ","Most_Deprived_Comm")
DeprivedCommData <- DeprivedCommData[c(2,1)]
TableData <- merge(TableData, DeprivedCommData)
TableData <- TableData[order(TableData$Most_Deprived_Comm),]
CPPMeanData <- merge(CPPMeanData, DeprivedCommData)
CPPMeanData <- filter(CPPMeanData, `Most_Deprived_Comm` == 1)
total <- rbind(TableData, CPPMeanData)
total <- merge(total, ChangeData)
total <- merge(total, CPPScoreData)
total <- unique(left_join(total, Labels, by = "IGZ"))
total <- total[,-c(1,3,4)]
#relabel in correct format, format in correct order and round values
total$Most_Deprived_Comm[total$Label == "CPPMean"] <- "6"
total <- total[order(total$Most_Deprived_Comm),]
total$Most_Deprived_Comm[total$Most_Deprived_Comm == 1] <- "1st Most Vulnerable"
total$Most_Deprived_Comm[total$Most_Deprived_Comm == 5] <- "5th Most Vulnerable"
total$AreaLabel <- total$CPP
total$AreaLabel[total$Label == "CPPMean"] <- "CPP Average"
total <- total[,-2]
total <- total[,c(29,1,18,27,4,5,20,6,7,21,8,9,22,10,11,23,12,13,24,14,15,25,16,17,26,2,3,19,28)]
total[,c(5,6,11,12,20,21,23,24,26,27)] <- round(total[,c(5,6,11,12,20,21,23,24,26,27)],2)
total[,c(8,9,14,15,17,18)] <- round(total[,c(8,9,14,15,17,18)],0)
total[total$AreaLabel =="CPP Average","InterZone_Name"] <- paste(total[total$AreaLabel =="CPP Average","CPP"], "Average")
#Save formatted data
write_excel_csv(total, file = "data/Formatted Vulnerable Communities.csv")
###Calculate the rankings and deciles for income deprivation
#Run this before PrepDecileData.R
setwd("C:/Users/cassidy.nicholas/OneDrive - IS/CPOP")
library(readxl)
library(dplyr)
library(readr)
detach(package:plyr)
##read dataset
DZdta <- read_excel("C:/Users/cassidy.nicholas/OneDrive - IS/CPOP/data/DZ & IGZ income data.xlsx", sheet = 2)
IGZdta <- read_excel("C:/Users/cassidy.nicholas/OneDrive - IS/CPOP/data/DZ & IGZ income data.xlsx", sheet = 3)[-4]
names(IGZdta)[5] <- "CPP"
##get count of dzs, get ranks and deciles
DZdta$count <- ave(DZdta$Council_area, DZdta$Council_area, FUN = length)
DZdta <- DZdta %>% group_by(Council_area) %>%
mutate(decl = ntile(`%`, 10))%>% ungroup()
#just double check IGZ ranks
IGZdta$count <- ave(IGZdta$CPP, IGZdta$CPP, FUN = length)
IGZdta<- IGZdta %>% dplyr::group_by(CPP) %>%
mutate(decl = ntile(`% of income deprived`, 10)) %>% ungroup()
sum(IGZdta$`Decile with CPP` != IGZdta$decl)
#so 345 don't match - which ones?
noMatch <- IGZdta[IGZdta$decl != IGZdta$`Decile with CPP`,]
##most mismatches are to do with rounding, but some issues e.g. Eilean SIar has
#no decile 1 in previous figs, so will keep new deciles
##Need to make decile 9 for isles into decile 10 as they have below 10 IGZs
IGZdta[IGZdta$CPP== "Shetland Islands"&IGZdta$decl== 7, "decl"] <- 10
IGZdta[IGZdta$CPP== "Orkney Islands"&IGZdta$decl== 6, "decl"] <- 10
IGZdta[IGZdta$CPP== "Eilean Siar"&IGZdta$decl== 9, "decl"] <- 10
##Get Scotland deciles
IGZdta <- IGZdta %>% mutate(ScotDecl = ntile(`% of income deprived`, 10))
DZdta <- DZdta %>% mutate(ScotDecl = ntile(`%`,10))
IGZDeciles <- IGZdta[c(1,5,10,11)]
colnames(IGZDeciles) <- c("AreaCode", "CouncilName", "Decile", "ScotlandDec")
DZDeciles <- DZdta[c(1,2,8,9)]
colnames(DZDeciles) <- c("AreaCode", "CouncilName", "Decile", "ScotlandDec")
#merge and save
allDta <- rbind(IGZDeciles[1:4],DZDeciles[1:4])
write_csv(allDta, "C:/Users/cassidy.nicholas/OneDrive - IS/CPOP/data/IncomeDeciles.csv")
##prepare inequality data for table
##Run this after Create income deciles.R
library(tidyverse)
setwd("C:/Users/cassidy.nicholas/OneDrive - IS/CPOP")
decDta <- read_csv("data/IncomeDeciles.csv")
IGZdta <- read_csv("data/IGZcleandata.csv")
IGZdta <- left_join(IGZdta, decDta, by = c("InterZone" = "AreaCode"))
##Make imputed data NAs and then remove them - in case
# we change our mind about how to display this in future
IGZdta[IGZdta$Type=="Imputed"|IGZdta$Type=="Projected", "value"] <- NA
IGZdta <- IGZdta[complete.cases(IGZdta$value),]
scotvals <- filter(IGZdta, ScotlandDec %in% c(1,10)) %>%
group_by(ScotlandDec, Indicator, Year) %>%
summarise_at(vars(value), list(mean))
scotvals$CouncilName <- "Scotland"
names(scotvals)[1] <- "Decile"
dd <- IGZdta[c(12,11,6,8:10)] %>% filter(Decile %in% c(1,10))%>%
group_by(Decile,Year,CouncilName,Indicator) %>%
summarise_at(vars(value), list(mean))
dd <- bind_rows(dd[1:5], scotvals[1:5])
##Previously we wanted to keep every year, but no longer! - get last year
#dd <- filter(dd, Year %in% c("2006/07", "2007/08", "2008/09", "2009/10",
# "2010/11", "2011/12", "2012/13",
# "2013/14", "2014/15", "2015/16", "2016/17", "2017/18"))
dd <- dd %>% dplyr::arrange(Year) %>% group_by(Indicator)%>%
filter(Year ==last(Year))
dd[dd$Indicator == "Child Poverty",4] <- "Child Poverty (%)"
dd[dd$Indicator == "Crime Rate",4] <- "Crime Rate, per 10,000"
dd[dd$Indicator == "Early Mortality",4] <- "Early Mortality, per 100,000"
dd[dd$Indicator == "Emergency Admissions",4] <- "Emergency Admissions, per 100,000"
dd[dd$Indicator == "Out of Work Benefits",4] <- "Out of Work Benefits (%)"
dd[dd$Indicator == "Participation",4] <- "Participation Rate (%)"
dd[dd$Indicator == "Depopulation",4] <- "Depopulation Index"
dd[dd$Indicator == "Attainment",4] <- "Average Highest Attainment"
saveRDS(dd, "data/DecileData.rds")
shiny::runApp()