rm(list = ls(all.names = TRUE))
graphics.off()
try(cat("\014"), silent = TRUE)
invisible(gc())
# =========================================================
# 6.4.2 – DEMOGRAFICKÁ POPTÁVKA A NABÍDKA
# ---------------------------------------------------------
# SPRÁVNÁ LOGIKA:
#
# POPULACE:
# - soubor: 130181-25data2024.csv
# - uzemi_txt = název ORP
# - uzemi_typ = "správní obvod obce s rozšířenou působností"
# - pohlavi_txt = muž / žena
# - vek_txt = přesně 0 až 18
# - hodnota = počet osob
#
# ORDINACE:
# - soubor: Otevrena-data-NR-01-06-nrpzs-mi.xlsx
# - ZZ_obec = název ORP pro join
# - poskytovatel_ico = validní řádek ordinace
# - 1 řádek = 1 ordinace
#
# JOIN:
# - CSV : uzemi_txt
# - XLSX: ZZ_obec
#
# UKAZATEL:
# - počet osob 0–18 / počet ordinací
# =========================================================
# =========================
# 0) Balíčky
# =========================
needed_packages <- c(
"readxl", "readr", "dplyr", "stringr",
"ggplot2", "scales", "openxlsx", "tibble"
)
to_install <- needed_packages[!needed_packages %in% rownames(installed.packages())]
if (length(to_install) > 0) install.packages(to_install)
suppressPackageStartupMessages({
library(readxl)
library(readr)
library(dplyr)
library(stringr)
library(ggplot2)
library(scales)
library(openxlsx)
library(tibble)
})
options(readxl.show_progress = FALSE)
# =========================
# 1) Cesty
# =========================
pop_file       <- "C:/Users/karel/Desktop/struktura-obyvatelstva/130181-25data2024.csv"
ordinance_file <- "C:/Users/karel/Desktop/mista-poskytovani-zdravotnich-sluzeb/Otevrena-data-NR-01-06-nrpzs-mi.xlsx"
output_dir     <- "C:/Users/karel/Desktop/6.4.2"
dir.create(output_dir, recursive = TRUE, showWarnings = FALSE)
# =========================
# 2) Pomocné funkce
# =========================
to_num <- function(x) {
x <- as.character(x)
x <- gsub("\u00A0", " ", x, fixed = TRUE)
x <- gsub("\\s+", "", x)
x <- gsub(",", ".", x, fixed = TRUE)
suppressWarnings(as.numeric(x))
}
fmt_int <- function(x) {
scales::number(x, accuracy = 1, big.mark = " ", decimal.mark = ",")
}
fmt_dec1 <- function(x) {
scales::number(x, accuracy = 0.1, big.mark = " ", decimal.mark = ",")
}
fmt_dec6 <- function(x) {
scales::number(x, accuracy = 0.000001, big.mark = " ", decimal.mark = ",")
}
norm_text <- function(x) {
x <- ifelse(is.na(x), "", as.character(x))
x <- stringr::str_squish(x)
x <- iconv(x, from = "", to = "ASCII//TRANSLIT")
x <- tolower(x)
x <- gsub("[–—−]", "-", x)
x <- gsub("\\s+", " ", x)
trimws(x)
}
norm_key <- function(x) {
x <- norm_text(x)
x <- gsub("[^a-z0-9]+", " ", x)
x <- gsub("\\s+", " ", x)
trimws(x)
}
# =========================
# 3) Kontrola vstupů
# =========================
if (!file.exists(pop_file)) stop("Chybí populační soubor: ", pop_file)
if (!file.exists(ordinance_file)) stop("Chybí soubor ordinací: ", ordinance_file)
# =========================
# 4) Ordinace 2024
# =========================
ordinance_sheets <- excel_sheets(ordinance_file)
ordinance_sheet <- if ("Master - komplet" %in% ordinance_sheets) "Master - komplet" else ordinance_sheets[1]
ord_raw <- read_excel(
ordinance_file,
sheet = ordinance_sheet,
.name_repair = "minimal"
)
required_ord_cols <- c("ZZ_obec", "poskytovatel_ico")
missing_ord_cols <- setdiff(required_ord_cols, names(ord_raw))
if (length(missing_ord_cols) > 0) {
cat("\nNázvy sloupců v XLSX:\n")
print(names(ord_raw))
stop("V XLSX chybí sloupce: ", paste(missing_ord_cols, collapse = ", "))
}
ord_base <- ord_raw %>%
transmute(
row_id = row_number(),
orp_xlsx = as.character(ZZ_obec),
poskytovatel_ico = as.character(poskytovatel_ico),
orp_clean = str_squish(ZZ_obec),
orp_key = norm_key(ZZ_obec),
ico_clean = str_squish(poskytovatel_ico)
) %>%
filter(
!is.na(ico_clean),
ico_clean != "",
orp_key != ""
)
if (nrow(ord_base) == 0) {
stop("Po načtení ordinací nezůstal žádný záznam.")
}
ord_orp <- ord_base %>%
group_by(orp_clean, orp_key) %>%
summarise(
pocet_ordinaci = n(),
.groups = "drop"
) %>%
arrange(desc(pocet_ordinaci), orp_clean)
# =========================
# 5) Populační data 2024 – POUZE ORP
# =========================
pop_raw <- read_csv(
pop_file,
show_col_types = FALSE,
locale = locale(encoding = "UTF-8")
)
required_pop_cols <- c("hodnota", "pohlavi_txt", "vek_txt", "uzemi_txt", "uzemi_typ", "obdobi")
missing_pop_cols <- setdiff(required_pop_cols, names(pop_raw))
if (length(missing_pop_cols) > 0) {
cat("\nNázvy sloupců v CSV:\n")
print(names(pop_raw))
stop("V CSV chybí sloupce: ", paste(missing_pop_cols, collapse = ", "))
}
pop_prepared <- pop_raw %>%
transmute(
hodnota_num = to_num(hodnota),
pohlavi_txt = as.character(pohlavi_txt),
vek_txt = as.character(vek_txt),
uzemi_txt = as.character(uzemi_txt),
uzemi_typ = as.character(uzemi_typ),
obdobi = as.character(obdobi),
orp_clean = str_squish(uzemi_txt),
orp_key = norm_key(uzemi_txt),
pohlavi_key = norm_key(pohlavi_txt),
vek_clean = str_squish(vek_txt)
) %>%
filter(
uzemi_typ == "správní obvod obce s rozšířenou působností",
obdobi == "2024-12-31",
pohlavi_key %in% c("muz", "zena"),
vek_clean %in% as.character(0:18),
orp_key != ""
)
if (nrow(pop_prepared) == 0) {
stop("Po filtraci populačních dat nezůstal žádný záznam.")
}
pop_orp_age_sex <- pop_prepared %>%
group_by(orp_clean, orp_key, pohlavi_key, vek_clean) %>%
summarise(
osoby = sum(hodnota_num, na.rm = TRUE),
.groups = "drop"
) %>%
arrange(orp_clean, pohlavi_key, suppressWarnings(as.integer(vek_clean)))
pop_orp_age <- pop_orp_age_sex %>%
group_by(orp_clean, orp_key, vek_clean) %>%
summarise(
osoby = sum(osoby, na.rm = TRUE),
.groups = "drop"
) %>%
arrange(orp_clean, suppressWarnings(as.integer(vek_clean)))
pop_orp_total <- pop_orp_age %>%
group_by(orp_clean, orp_key) %>%
summarise(
deti_0_18_2024 = sum(osoby, na.rm = TRUE),
.groups = "drop"
) %>%
arrange(desc(deti_0_18_2024), orp_clean)
# =========================
# 6) Finální analýza
# =========================
analysis_orp <- ord_orp %>%
inner_join(
pop_orp_total,
by = "orp_key",
suffix = c("_ord", "_pop")
) %>%
transmute(
orp = orp_clean_ord,
orp_pop = orp_clean_pop,
deti_0_18_2024 = deti_0_18_2024,
pocet_ordinaci = pocet_ordinaci,
deti_na_ordinaci = deti_0_18_2024 / pocet_ordinaci
) %>%
arrange(desc(deti_na_ordinaci), desc(deti_0_18_2024), desc(pocet_ordinaci), orp)
if (nrow(analysis_orp) == 0) {
stop("Po spojení populačních dat a ordinací nezůstal žádný záznam.")
}
# =========================
# 7) Diagnostika
# =========================
ord_unmatched <- ord_orp %>%
anti_join(pop_orp_total, by = "orp_key") %>%
arrange(orp_clean)
pop_without_ordinance <- pop_orp_total %>%
anti_join(ord_orp, by = "orp_key") %>%
arrange(desc(deti_0_18_2024), orp_clean)
summary_stats <- tibble(
ukazatel = c(
"ORP v populačních datech",
"ORP s ordinací v NR-01-06",
"ORP v průniku",
"Celkový počet ordinací v analyzovaných ORP",
"Počet ordinací bez napojení na populační data",
"Počet ORP bez ordinace"
),
hodnota = c(
nrow(pop_orp_total),
nrow(ord_orp),
nrow(analysis_orp),
sum(analysis_orp$pocet_ordinaci, na.rm = TRUE),
sum(ord_unmatched$pocet_ordinaci, na.rm = TRUE),
nrow(pop_without_ordinance)
)
)
# =========================
# 8) Kontrolní jednotky
# =========================
kontrola_ocekavana <- tibble::tribble(
~orp,               ~deti_expected, ~ordinace_expected, ~ukazatel_expected,
"Černošice",         40964,          1,                  40964,
"Šlapanice",         17843,          2,                  8921.5,
"Lysá nad Labem",     7927,          3,                  7927 / 3,
"Ústí nad Labem",    23928,         14,                  23928 / 14,
"Broumov",            2910,          2,                  1455,
"Děčín",             14509,         10,                  14509 / 10
)
kontrola_vysledku <- analysis_orp %>%
filter(orp %in% kontrola_ocekavana$orp) %>%
select(orp, deti_0_18_2024, pocet_ordinaci, deti_na_ordinaci) %>%
left_join(kontrola_ocekavana, by = "orp") %>%
mutate(
deti_ok = deti_0_18_2024 == deti_expected,
ordinace_ok = pocet_ordinaci == ordinace_expected,
ukazatel_ok = abs(deti_na_ordinaci - ukazatel_expected) < 1e-8
) %>%
arrange(match(orp, kontrola_ocekavana$orp))
# =========================
# 9) TOP 15 nejhorších
# =========================
top_n <- min(15, nrow(analysis_orp))
top15 <- analysis_orp %>%
slice_head(n = top_n) %>%
mutate(
orp = factor(orp, levels = rev(orp)),
label_hodnota = fmt_dec1(deti_na_ordinaci)
)
tabulka_top15 <- analysis_orp %>%
slice_head(n = top_n) %>%
transmute(
`Správní obvod ORP` = orp,
`Počet osob 0–18 let` = fmt_int(deti_0_18_2024),
`Počet ordinací` = fmt_int(pocet_ordinaci),
`Počet osob 0–18 let na 1 ordinaci` = fmt_dec1(deti_na_ordinaci)
)
# =========================
# 10) Graf
# =========================
plot_top15 <- ggplot(top15, aes(x = deti_na_ordinaci, y = orp)) +
geom_col(fill = "#4F86B7") +
geom_text(
aes(label = label_hodnota),
hjust = -0.1,
size = 4
) +
scale_x_continuous(
labels = label_number(big.mark = " ", decimal.mark = ","),
expand = expansion(mult = c(0, 0.14))
) +
labs(
title = "TOP 15 nejzatíženějších ORP v roce 2024",
subtitle = "Počet osob ve věku 0–18 let na 1 ordinaci",
x = "Počet osob 0–18 let na 1 ordinaci",
y = NULL
) +
theme_minimal(base_size = 13) +
theme(
panel.grid.minor = element_blank(),
plot.title = element_text(face = "bold", size = 19),
plot.subtitle = element_text(size = 11)
)
ggsave(
filename = file.path(output_dir, "6.4.1_top15_orp_graf.png"),
plot = plot_top15,
width = 13,
height = 8.5,
dpi = 300,
bg = "white"
)
# =========================
# 11) Uložení dat
# =========================
write_csv(pop_orp_age_sex, file.path(output_dir, "6.4.1_orp_populace_detail_vek_pohlavi_2024.csv"))
write_csv(pop_orp_age, file.path(output_dir, "6.4.1_orp_populace_detail_vek_2024.csv"))
write_csv(pop_orp_total, file.path(output_dir, "6.4.1_orp_populace_2024.csv"))
write_csv(ord_base, file.path(output_dir, "6.4.1_ordinace_vstup_vsechny_radky.csv"))
write_csv(ord_orp, file.path(output_dir, "6.4.1_ordinace_2024.csv"))
write_csv(analysis_orp, file.path(output_dir, "6.4.1_analyza_2024.csv"))
write_csv(top15 %>% mutate(orp = as.character(orp)), file.path(output_dir, "6.4.1_top15_orp_2024.csv"))
write_csv(tabulka_top15, file.path(output_dir, "6.4.1_top15_orp_tabulka.csv"))
write_csv(ord_unmatched, file.path(output_dir, "6.4.1_ordinace_bez_napojeni_na_populaci.csv"))
write_csv(pop_without_ordinance, file.path(output_dir, "6.4.1_orp_bez_ordinace.csv"))
write_csv(summary_stats, file.path(output_dir, "6.4.1_souhrnne_pocty.csv"))
write_csv(kontrola_vysledku, file.path(output_dir, "6.4.1_kontrola_spravnosti.csv"))
# =========================
# 12) Excel
# =========================
wb <- createWorkbook()
addWorksheet(wb, "analyza_2024")
writeData(wb, "analyza_2024", analysis_orp)
addWorksheet(wb, "top15")
writeData(wb, "top15", top15 %>% mutate(orp = as.character(orp)))
addWorksheet(wb, "tabulka_top15")
writeData(wb, "tabulka_top15", tabulka_top15)
addWorksheet(wb, "pop_detail_vek_pohlavi")
writeData(wb, "pop_detail_vek_pohlavi", pop_orp_age_sex)
addWorksheet(wb, "pop_detail_vek")
writeData(wb, "pop_detail_vek", pop_orp_age)
addWorksheet(wb, "populace_2024")
writeData(wb, "populace_2024", pop_orp_total)
addWorksheet(wb, "ordinace_vstup")
writeData(wb, "ordinace_vstup", ord_base)
addWorksheet(wb, "ordinace_2024")
writeData(wb, "ordinace_2024", ord_orp)
addWorksheet(wb, "souhrn")
writeData(wb, "souhrn", summary_stats)
addWorksheet(wb, "kontrola")
writeData(wb, "kontrola", kontrola_vysledku)
saveWorkbook(
wb,
file = file.path(output_dir, "6.4.1_analyza_2024.xlsx"),
overwrite = TRUE
)
# =========================
# 13) Konzole
# =========================
cat("\n=====================================================\n")
cat("ANALÝZA HOTOVO\n")
cat("Výstupní složka:", output_dir, "\n")
cat("=====================================================\n\n")
cat("Souhrnné počty:\n")
print(summary_stats, n = Inf)
cat("\nKONTROLNÍ JEDNOTKY:\n")
print(kontrola_vysledku, n = Inf)
cat("\nRuční kontrola vybraných jednotek:\n")
print(
analysis_orp %>%
filter(orp %in% c("Černošice", "Šlapanice", "Lysá nad Labem", "Ústí nad Labem", "Broumov", "Děčín")) %>%
select(orp, deti_0_18_2024, pocet_ordinaci, deti_na_ordinaci) %>%
arrange(desc(deti_na_ordinaci)),
n = Inf
)
cat("\nTOP 15 nejhorších jednotek:\n")
print(tabulka_top15, n = Inf)
