修訂 | 9bbf97f599906b991ce8420f8e7bb3cad3717394 (tree) |
---|---|
時間 | 2024-12-17 22:37:45 |
作者 | Lorenzo Isella <lorenzo.isella@gmai...> |
Commiter | Lorenzo Isella |
I added a script to handle the various lists of SA numbers for financial aid to banks.
@@ -0,0 +1,184 @@ | ||
1 | +rm(list=ls()) | |
2 | + | |
3 | +library(tidyverse) | |
4 | +library(janitor) | |
5 | +library(openxlsx) | |
6 | + | |
7 | +source("/home/lorenzo/myprojects-hg/R-codes/stat_lib.R") | |
8 | + | |
9 | + | |
10 | + | |
11 | +positive_decisions <- c("Article 4(3) - decision not to raise objections", "Article 7(3) - positive decision" ) | |
12 | + | |
13 | + | |
14 | + | |
15 | +df_ini <- read_csv("../input/cases_to_filter_complete.csv") | |
16 | + | |
17 | +df <- df_ini |> | |
18 | + clean_data() |> | |
19 | + all_to_lower() | |
20 | + | |
21 | +df_out <- df |> | |
22 | + filter(banking_aid_yes_no=="yes") |> | |
23 | + distinct(case_reference, .keep_all=T) | |
24 | + | |
25 | +save_excel(df_out,"../output/case_list_provisional.xlsx") | |
26 | + | |
27 | + | |
28 | +df_ec <- read_csv("../input/SA-Case_Full_Extract_Case_EC_2024-11-1511-06-55.csv.gz") |> | |
29 | + clean_names() | |
30 | + | |
31 | + | |
32 | +df_out2 <- df_out |> | |
33 | + mutate(case_reference=toupper(case_reference)) | |
34 | + | |
35 | +df_ec_out <- df_ec |> | |
36 | + filter(case_reference %in% df_out2$case_reference) |> | |
37 | + group_by(case_reference, case_title) |> | |
38 | + summarise(primary_obj=paste(unique(primary_objective), collapse=("------")), | |
39 | + secondary_obj=paste(unique(secondary_objective), collapse=("------")), | |
40 | + aid_instr=paste(unique(aid_instrument), collapse=("------")), | |
41 | + MS=unique(member_state_long_name), | |
42 | + primary_legal_basis=paste(unique(primary_law), collapse=("------")), | |
43 | + secondary_legal_basis=paste(unique(secondary_law_in_force), collapse=("------")), | |
44 | + internal_qualifier=paste(unique(internal_qualifier_s), collapse=("------")) | |
45 | + | |
46 | + ) |> | |
47 | + ungroup() |> | |
48 | + arrange(case_reference) | |
49 | + | |
50 | + | |
51 | +save_excel(df_ec_out, "../output/list_extra_variables.xlsx") | |
52 | + | |
53 | + | |
54 | + | |
55 | + | |
56 | +df_fin <- read_csv("../input/YES_cases-Copy.csv") |> | |
57 | + mutate(case_reference=toupper(case_reference)) | |
58 | + | |
59 | + | |
60 | +extra_cases <- setdiff(df_fin$case_reference, df_ec_out$case_reference) | |
61 | +extra_cases2 <- setdiff(df_ec_out$case_reference,df_fin$case_reference ) | |
62 | + | |
63 | + | |
64 | + | |
65 | +df_rod_approuved <- read_csv("../input/20241212_Internal_qualifier_banking_aid-approved.csv") |> | |
66 | + clean_data() | |
67 | + | |
68 | +df_rod_dupes <- df_rod_approuved |> | |
69 | + get_dupes_short(sa_case) | |
70 | + | |
71 | +save_excel(df_rod_dupes, "../output/state_aid_approved_database_duplicated.xlsx") | |
72 | + | |
73 | + | |
74 | + | |
75 | +df_not <- df_rod_approuved |> | |
76 | + filter(str_starts(in_lorenzos_extraction,"Not" )) |> | |
77 | + mutate(sa_case=remove_trailing_spaces(sa_case)) |> | |
78 | + mutate(sa_case=str_replace(sa_case, " ", "")) | |
79 | + | |
80 | +df_not2 <- setdiff(df_rod_approuved$sa_case, df_fin$case_reference) | |
81 | + | |
82 | +df_sa_present <- df_fin |> | |
83 | + filter(case_reference %in% df_not$sa_case) | |
84 | + | |
85 | +save_excel(df_sa_present, "../output/cases_present_initial_list_lorenzo.xlsx") | |
86 | + | |
87 | +df_ec_not <- df_ec |> | |
88 | + filter(case_reference %in% df_not$sa_case) | |
89 | + | |
90 | + | |
91 | +df_ec_not_real <- df_ec |> | |
92 | + filter(case_reference %in% df_not2) | |
93 | + | |
94 | + | |
95 | + | |
96 | +df_ec_not_article <- df_ec_not |> | |
97 | + group_by(case_reference) |> | |
98 | + summarise(article=paste(unique(decision_type_article ), collapse="------")) |> | |
99 | + ungroup() |> | |
100 | + mutate(articles_positive_decision=if_else(article %in% positive_decisions, | |
101 | + "yes", "no")) | |
102 | + | |
103 | + | |
104 | +sa_missing <- setdiff(df_not$sa_case, df_ec$case_reference) | |
105 | + | |
106 | +df_ec_not_compact <- df_ec_not_real |> | |
107 | + group_by(case_reference, case_title) |> | |
108 | + summarise(article=paste(unique(decision_type_article ), collapse="------"), | |
109 | + primary_obj=paste(unique(primary_objective), collapse=("------")), | |
110 | + secondary_obj=paste(unique(secondary_objective), collapse=("------")), | |
111 | + aid_instr=paste(unique(aid_instrument), collapse=("------")), | |
112 | + MS=unique(member_state_long_name), | |
113 | + primary_legal_basis=paste(unique(primary_law), collapse=("------")), | |
114 | + secondary_legal_basis=paste(unique(secondary_law_in_force), collapse=("------")), | |
115 | + internal_qualifier=paste(unique(internal_qualifier_s), collapse=("------")) | |
116 | + | |
117 | + ) |> | |
118 | + ungroup() |> | |
119 | + arrange(case_reference) |> | |
120 | + mutate(articles_positive_decision=if_else(article %in% positive_decisions, | |
121 | + "yes", "no")) |> | |
122 | + mutate(new_objective=paste(primary_obj, secondary_obj, sep=" ")) |> | |
123 | + mutate(is_remedy = if_else(str_detect(new_objective, "Remedy for a serious disturbance in the economy"), "yes", "no" )) |> | |
124 | + mutate(is_temporary_framework=if_else(str_detect(internal_qualifier, "Temporary Framework"), "yes", "no")) |> | |
125 | + mutate(is_financial_crisis=if_else(str_detect(internal_qualifier, "Financial crisis case"), "yes", "no")) |> | |
126 | + mutate(is_UK=if_else(MS=="United Kingdom", "yes", "no")) |> | |
127 | + mutate(is_prolongation=if_else(str_detect(case_title, "(?i)prolongation"), | |
128 | + "yes", "no")) | |
129 | + | |
130 | + | |
131 | + | |
132 | + | |
133 | + | |
134 | +df_ec_filtered <- df_ec_not_compact |> | |
135 | + filter(is_UK=="no", | |
136 | + articles_positive_decision=="yes") | |
137 | + | |
138 | + | |
139 | +save_excel(df_ec_filtered, "../output/cases_investigation.xlsx") | |
140 | + | |
141 | + | |
142 | + | |
143 | +tt <- "SA.103450" | |
144 | + | |
145 | +test <- df_ec_filtered |> | |
146 | + filter(case_reference==tt) | |
147 | + | |
148 | + | |
149 | +df_rod_fin <- df_ec_filtered |> | |
150 | + select(case_reference) |> | |
151 | + mutate(origin="Rodrigo") | |
152 | + | |
153 | +df_lor_fin <- df_fin|> | |
154 | + select(case_reference) |> | |
155 | + mutate(origin="Lorenzo") | |
156 | + | |
157 | + | |
158 | +df_fin_combined <- bind_rows(df_rod_fin, df_lor_fin) | |
159 | + | |
160 | +save_excel(df_fin_combined, "../output/combined_list_A3_D3.xlsx") | |
161 | + | |
162 | +df_fin_combined_expanded <- df_ec |> | |
163 | + filter(case_reference %in% df_fin_combined$case_reference) |> | |
164 | + group_by(case_reference, case_title) |> | |
165 | + summarise(primary_obj=paste(unique(primary_objective), collapse=("------")), | |
166 | + secondary_obj=paste(unique(secondary_objective), collapse=("------")), | |
167 | + aid_instr=paste(unique(aid_instrument), collapse=("------")), | |
168 | + MS=unique(member_state_long_name), | |
169 | + primary_legal_basis=paste(unique(primary_law), collapse=("------")), | |
170 | + secondary_legal_basis=paste(unique(secondary_law_in_force), collapse=("------")), | |
171 | + internal_qualifier=paste(unique(internal_qualifier_s), collapse=("------")) | |
172 | + | |
173 | + ) |> | |
174 | + ungroup() |> | |
175 | + arrange(case_reference) | |
176 | + | |
177 | + | |
178 | +df_out_exp <- df_fin_combined |> | |
179 | + left_join(y=df_fin_combined_expanded, by=c("case_reference")) | |
180 | + | |
181 | +save_excel(df_out_exp, "../output/combined_list_A3_D3_case_ec.xlsx") | |
182 | + | |
183 | + | |
184 | +print("So far so good") |