The CliRes data are in 2 excel files, each with many tabs. One of these files contains general information and the other one contains the surveillance data:
> general_data <- "https://www.dropbox.com/s/3hfnhd39fxjmcnh/20-5-2019-_18ZN_Category_V1_Data.xls?dl=0"
> surveillance_data <- "https://www.dropbox.com/s/4kk6qewmntw3q5o/20-5-2019-_18ZN_V1_Data.xls?dl=0"
Installing the required packages:
> required <- c("dplyr", "magrittr", "purrr", "readxl", "tibble", "tidyr")
> to_install <- setdiff(required, row.names(installed.packages()))
> if (length(to_install)) install.packages(to_install)
Loading magrittr
:
> library(magrittr)
Naming output of lapply()
:
> lapply2 <- function(X, FUN, ...) setNames(lapply(X, FUN, ...), X)
Tuning the download.file()
function in order to be able to download from a Dropbox link:
> download_dropbox <- function(url, ...) download.file(sub("dl=0", "raw=1", url), ...)
The following function returns the flocks with missing weeks:
> missing_weeks <- function(x) {
+ tmp <- x %>%
+ dplyr::group_by(USUBJID, FLOCKSEQUENCE) %>%
+ dplyr::arrange(WEEK) %>%
+ dplyr::summarise(a = length(unique(diff(WEEK)))) %>%
+ dplyr::ungroup() %>%
+ dplyr::filter(a > 1)
+ if (nrow(tmp)) return(purrr::map2(tmp$USUBJID,
+ tmp$FLOCKSEQUENCE,
+ function(farm, flock)
+ x %>%
+ dplyr::filter(USUBJID == farm, FLOCKSEQUENCE == flock) %>%
+ dplyr::select(USUBJID, FLOCKSEQUENCE, WEEK)))
+ message("There is no missing week.")
+ invisible(0)
+ }
The ESVAC conversion factors are available from the table 2 of page 62 of this OIE report. A copy-paste of this table without headers is available here. Let’s download this file:
> tmp <- tempfile()
> download.file("https://raw.githubusercontent.com/viparc/clires_data/master/raw_data/esvac.txt", tmp)
And read and reformat the data into a named numeric vector:
> (esvac <- tmp %>%
+ readLines() %>%
+ sub("^#.*", "", .) %>%
+ sub(" *", "", .) %>%
+ {.[. != ""]} %>%
+ matrix(ncol = 4, byrow = TRUE) %>%
+ as.data.frame(stringsAsFactors = FALSE) %>%
+ setNames(c("ab_vet_med", "ab_oie", "IU.mg", "mg.IU")) %>%
+ dplyr::filter(!grepl("methane", ab_vet_med)) %>%
+ dplyr::transmute(ab_oie = sub(" .*$", "", tolower(ab_oie)),
+ mg.IU = as.numeric(mg.IU)) %>%
+ dplyr::bind_rows(data.frame(ab_oie = "josamycin",
+ mg.IU = 0.001, stringsAsFactors = FALSE)) %$%
+ setNames(mg.IU, ab_oie))
bacitracin benzylpenicillin chlortetracycline colistin dihydrostreptomycin erythromycin gentamicin kanamycin neomycin neomycin oxytetracycline paromomycin
0.013514 0.000600 0.001111 0.000049 0.001220 0.001087 0.001613 0.001256 0.001325 0.001492 0.001149 0.001481
polymyxin rifamycin spiramycin streptomycin tobramycin tylosin tetracycline josamycin
0.000119 0.001127 0.000313 0.001274 0.001143 0.001000 0.001000 0.001000
Note that we got rid off one of the colistin data and that we manually added the data for josamycin that was absent from the table.
Downloading the general data file:
> tmp <- tempfile()
> download_dropbox(general_data, tmp)
Reformating and merging with the esvac
dataframe that we generated above:
> (drug_codes <- c("ANTIMICROBIAL", "ANTIMICROBIAL_GridAnti") %>%
+ lapply(readxl::read_excel, path = tmp) %>%
+ c(list(c("ANTIMICROBIAL_SEQ"))) %>%
+ do.call(dplyr::right_join, .) %>%
+ dplyr::select(CODE, ANTINAME1, CONCENTRATION, GRIDUNIT, GRIDPACK, GRIDPACKUNIT) %>%
+ dplyr::filter(! ANTINAME1 %in% c("alicin", "axit oxolinic", "iodo-hydroxyquinoline", "metronidazol", "nystatin")) %>%
+ dplyr::mutate(ANTINAME1 = dplyr::recode(ANTINAME1, sunfadimethoxine = "sulfadimethoxine",
+ sunphamethoxazole = "sulphamethoxazole"),
+ GRIDUNIT = dplyr::na_if(GRIDUNIT, "NA"),
+ GRIDUNIT = dplyr::na_if(GRIDUNIT, "na"),
+ GRIDUNIT = dplyr::recode(GRIDUNIT, UI = "IU", mg = "MG"),
+ GRIDPACKUNIT = dplyr::na_if(GRIDPACKUNIT, "na"),
+ GRIDPACKUNIT = dplyr::na_if(GRIDPACKUNIT, "VI"),
+ GRIDPACKUNIT = dplyr::recode(GRIDPACKUNIT, g = "G", ml = "G", ML = "G"),
+ GRIDPACK = dplyr::na_if(GRIDPACK, "na"),
+ GRIDPACK = as.numeric(GRIDPACK),
+ CONCENTRATION = dplyr::na_if(CONCENTRATION, "NA"),
+ CONCENTRATION = dplyr::na_if(CONCENTRATION, "na"),
+ CONCENTRATION = dplyr::recode(CONCENTRATION, S = "500"), # this will ultimately be corrected in CliRes CORRECTION
+ CONCENTRATION = as.numeric(CONCENTRATION),
+ CONCENTRATION = ifelse(GRIDUNIT == "IU", CONCENTRATION * esvac[ANTINAME1], CONCENTRATION),
+ GRIDUNIT = ifelse(GRIDUNIT == "IU", "MG", GRIDUNIT), # linked to the above line
+ CONCENTRATION = ifelse(GRIDUNIT == "MG", CONCENTRATION / 1000, CONCENTRATION),
+ GRIDUNIT = ifelse(GRIDUNIT == "MG", "G", GRIDUNIT), # linked to the above line
+ GRIDPACK = ifelse(GRIDPACKUNIT == "KG", 1000 * GRIDPACK, GRIDPACK),
+ GRIDPACKUNIT = ifelse(GRIDPACKUNIT == "KG", "G", GRIDPACKUNIT), # linked to the above line
+ proportion = CONCENTRATION / GRIDPACK) %>%
+ dplyr::select(CODE, ANTINAME1, proportion))
# A tibble: 461 x 3
CODE ANTINAME1 proportion
<chr> <chr> <dbl>
1 AB001 colistin 0.000735
2 AB001 oxytetracycline 0.015
3 AB002 oxytetracycline 0.07
4 AB002 colistin 0.0098
5 AB003 neomycin 0.0795
6 AB003 colistin 0.0147
7 AB004 oxytetracycline 0.01
8 AB005 gentamicin 0.06
9 AB005 colistin 0.0245
10 AB006 doxycycline 0.2
# … with 451 more rows
Downloading the surveillance data file:
> tmp <- tempfile()
> download_dropbox(surveillance_data, tmp)
For antimicrobial, disease and chicken data in the surveillance file, we need the following pairs of tabs respectively, that we have to merge:
> antimicrobial <- c("ANTIMICROBIAL", "ANTIMICROBIAL_GridEvent")
> disease <- c("DISEASE", "DISEASE_GridEvent")
> chicks <- c("MID_INOUT", "MID_INOUT_GridEvent")
Reading the data from file:
> (surveillance <- lapply2(c(antimicrobial, disease, chicks, "SAMPLE"),
+ readxl::read_excel, path = tmp))
$ANTIMICROBIAL
# A tibble: 5,567 x 11
EVENT USUBJID ANTIMICROBIAL_SEQ STUDYID SITEID FARMCODE FLOCKSEQUENCE WEEKNO ANTIBIOTICNO entry enteredtime
<chr> <chr> <dbl> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dttm>
1 FLOCK 75-001 1 18ZN 75 001 01 1 1 2 2017-01-31 11:17:24
2 FLOCK 75-002 1 18ZN 75 002 01 1 1 2 2017-02-24 15:46:27
3 FLOCK 75-003 1 18ZN 75 003 01 1 0 2 2017-02-01 12:21:30
4 FLOCK 75-004 1 18ZN 75 004 01 1 1 2 2017-01-31 15:53:38
5 FLOCK 75-005 1 18ZN 75 005 01 1 2 2 2017-01-09 09:21:55
6 FLOCK 75-006 1 18ZN 75 006 01 1 1 2 2017-02-01 14:04:31
7 FLOCK 75-007 1 18ZN 75 007 01 1 1 2 2017-01-26 20:14:44
8 FLOCK 75-008 1 18ZN 75 008 01 1 1 2 2017-01-31 16:56:22
9 FLOCK 75-009 1 18ZN 75 009 01 1 1 2 2017-01-04 15:47:35
10 FLOCK 75-010 1 18ZN 75 010 01 1 2 2 2017-01-09 10:56:58
# … with 5,557 more rows
$ANTIMICROBIAL_GridEvent
# A tibble: 1,808 x 15
ANTIMICROBIAL_SEQ EVENT USUBJID ANTIMICROBIAL_GridEvent_SEQ CODE INITIAL AMOUTUSED PACKAGE PACKAGEUNIT COST ALLFLOCK CHCKENUSEDNO PREVENTION entry enteredtime
<dbl> <chr> <chr> <dbl> <chr> <chr> <dbl> <chr> <chr> <dbl> <lgl> <dbl> <lgl> <dbl> <dttm>
1 1 FLOCK 75-001 1 AB001 COLITERRA 100 100 G 17000 TRUE NA TRUE 2 2017-01-31 11:17:24
2 1 FLOCK 75-002 1 AB001 COLITERRA 50 100 G 17000 TRUE NA TRUE 2 2017-02-24 15:46:27
3 1 FLOCK 75-004 1 AB001 COLITERRA 100 100 G 17000 TRUE NA TRUE 2 2017-01-31 15:53:38
4 1 FLOCK 75-005 1 AB001 Coliterra 100 100 G 25000 TRUE NA TRUE 2 2017-01-09 09:21:55
5 1 FLOCK 75-006 1 AB001 COLITERRA 100 100 G 25000 TRUE NA TRUE 2 2017-02-01 14:04:31
6 1 FLOCK 75-007 1 AB001 Coliterra 100 100 G 32000 TRUE NA TRUE 2 2017-01-26 20:14:44
7 1 FLOCK 75-008 1 AB001 COLITERRA 600 100 G 25000 TRUE NA TRUE 2 2017-01-31 16:56:22
8 1 FLOCK 75-009 1 AB001 Coliterra 50 100 G 35000 TRUE NA TRUE 2 2017-01-04 15:47:35
9 1 FLOCK 75-010 1 AB001 Coliterra 100 100 G 25000 TRUE NA TRUE 2 2017-01-09 10:56:58
10 1 FLOCK 75-011 1 AB001 Coliterra 100 100 G 15000 TRUE NA TRUE 2 2017-01-09 09:59:28
# … with 1,798 more rows
$DISEASE
# A tibble: 322 x 9
EVENT USUBJID DISEASE_SEQ STUDYID SITEID FARMCODE FLOCKSEQUENCE entry enteredtime
<chr> <chr> <dbl> <chr> <chr> <chr> <chr> <dbl> <dttm>
1 FLOCK 75-001 1 18ZN 75 001 01 2 2017-01-31 11:16:13
2 FLOCK 75-002 1 18ZN 75 002 01 2 2017-02-24 15:45:23
3 FLOCK 75-003 1 18ZN 75 003 01 2 2017-02-01 12:18:11
4 FLOCK 75-004 1 18ZN 75 004 01 2 2017-01-31 15:52:40
5 FLOCK 75-005 1 18ZN 75 005 01 2 2017-01-09 09:20:57
6 FLOCK 75-006 1 18ZN 75 006 01 2 2017-02-01 14:03:44
7 FLOCK 75-007 1 18ZN 75 007 01 2 2017-01-26 20:13:38
8 FLOCK 75-008 1 18ZN 75 008 01 2 2017-01-31 16:55:40
9 FLOCK 75-009 1 18ZN 75 009 01 2 2017-01-04 15:46:23
10 FLOCK 75-010 1 18ZN 75 010 01 2 2017-01-09 10:54:05
# … with 312 more rows
$DISEASE_GridEvent
# A tibble: 5,565 x 17
DISEASE_SEQ EVENT USUBJID DISEASE_GridEvent_SEQ WEEK RESPIRATORY DIARRHOEA CNS MALAISE LEGLESIONS SUDDENDEATH OTHDISEASE CHICKENSICKNO CHICKENDISEASEDEATH CHICKENSUDDENDEATH entry enteredtime
<dbl> <chr> <chr> <dbl> <dbl> <lgl> <lgl> <lgl> <lgl> <lgl> <lgl> <chr> <dbl> <dbl> <dbl> <dbl> <dttm>
1 1 FLOCK 75-001 1 1 FALSE FALSE FALSE FALSE FALSE TRUE <NA> NA NA 3 2 2017-01-31 11:16:13
2 1 FLOCK 75-002 1 1 FALSE FALSE FALSE TRUE FALSE FALSE <NA> 5 5 NA 2 2017-02-24 15:45:23
3 1 FLOCK 75-003 1 1 FALSE FALSE FALSE FALSE FALSE FALSE CHUOT CAN NA 19 NA 2 2017-02-01 12:18:12
4 1 FLOCK 75-004 1 1 FALSE FALSE FALSE FALSE FALSE FALSE YEU CHET NA 2 NA 2 2017-01-31 15:52:40
5 1 FLOCK 75-005 1 1 FALSE TRUE FALSE TRUE FALSE FALSE <NA> 28 8 NA 2 2017-01-09 09:20:58
6 1 FLOCK 75-006 1 1 FALSE FALSE FALSE FALSE FALSE FALSE 1 CON BI DE CHET 2 3 NA 2 2017-02-01 14:03:45
7 1 FLOCK 75-007 1 1 FALSE FALSE FALSE FALSE FALSE FALSE Ga khuyet tat NA 4 NA 2 2017-01-26 20:13:38
8 1 FLOCK 75-008 1 1 FALSE FALSE FALSE TRUE FALSE FALSE 06 CON CHET KHONG RO NGUYEN NHAN 4 4 6 2 2017-01-31 16:55:40
9 1 FLOCK 75-009 1 1 FALSE FALSE FALSE FALSE FALSE FALSE <NA> NA NA NA 2 2017-01-04 15:46:23
10 1 FLOCK 75-010 1 1 FALSE FALSE FALSE FALSE FALSE FALSE <NA> NA NA NA 2 2017-01-09 10:54:05
# … with 5,555 more rows
$MID_INOUT
# A tibble: 322 x 9
EVENT USUBJID MID_INOUT_SEQ STUDYID SITEID FARMCODE FLOCKSEQUENCE entry enteredtime
<chr> <chr> <dbl> <chr> <chr> <chr> <chr> <dbl> <dttm>
1 FLOCK 75-001 1 18ZN 75 001 01 2 2017-01-31 11:00:56
2 FLOCK 75-002 1 18ZN 75 002 01 2 2017-02-01 14:25:02
3 FLOCK 75-003 1 18ZN 75 003 01 2 2017-02-01 12:04:35
4 FLOCK 75-004 1 18ZN 75 004 01 2 2017-01-31 15:36:13
5 FLOCK 75-005 1 18ZN 75 005 01 2 2017-01-09 09:05:19
6 FLOCK 75-006 1 18ZN 75 006 01 2 2017-02-01 13:52:37
7 FLOCK 75-007 1 18ZN 75 007 01 2 2017-01-26 20:00:20
8 FLOCK 75-008 1 18ZN 75 008 01 2 2017-01-31 16:41:08
9 FLOCK 75-009 1 18ZN 75 009 01 2 2017-01-04 15:29:06
10 FLOCK 75-010 1 18ZN 75 010 01 2 2017-01-09 10:36:04
# … with 312 more rows
$MID_INOUT_GridEvent
# A tibble: 5,566 x 19
MID_INOUT_SEQ EVENT USUBJID MID_INOUT_GridEv… WEEK CHICKENBOUGHT BUYINGCOST OTHCHICKEN OTHCHICKENSPEC ADDINGCHICKENTOT… CHICKENSELLINGT… SELLINGTOTALKGS CHICKENSELLINGV… OTHCHICKENEX EXREASON CHICKENEXTOTAL CHICKENTOTAL entry enteredtime
<dbl> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <dbl> <dbl> <lgl> <dbl> <dbl> <chr> <dbl> <dbl> <dbl> <dttm>
1 1 FLOCK 75-001 1 1 NA NA NA <NA> NA NA NA NA 3 CHET DOT… 3 207 2 2017-01-31 11:00:56
2 1 FLOCK 75-002 1 1 NA NA NA <NA> NA NA NA NA 5 CHET BENH 5 500 2 2017-02-01 14:25:02
3 1 FLOCK 75-003 1 1 NA NA NA <NA> 0 NA NA NA 25 CHUOT CAN 25 381 2 2018-10-29 09:49:51
4 1 FLOCK 75-004 1 1 NA NA NA <NA> NA NA NA NA 2 YEU CHET 2 713 2 2017-01-31 15:36:13
5 1 FLOCK 75-005 1 1 8 96000 NA <NA> 8 NA NA NA 8 chết bệnh 8 102 2 2018-10-25 09:33:31
6 1 FLOCK 75-006 1 1 NA NA NA <NA> NA NA NA NA 5 3 CO CHE… 5 97 2 2018-10-25 09:41:55
7 1 FLOCK 75-007 1 1 NA NA NA <NA> 0 NA NA NA 4 yeu, khu… 4 98 2 2017-01-26 20:00:20
8 1 FLOCK 75-008 1 1 NA NA NA <NA> NA NA NA NA 10 06 CON C… 10 590 2 2017-01-31 16:41:08
9 1 FLOCK 75-009 1 1 NA NA NA <NA> NA NA NA NA NA <NA> NA 153 2 2018-08-26 09:54:30
10 1 FLOCK 75-010 1 1 NA NA NA <NA> NA NA NA NA NA 0 NA 410 2 2018-08-26 09:55:01
# … with 5,556 more rows
$SAMPLE
# A tibble: 961 x 19
EVENT USUBJID SAMPLE_SEQ STUDYID SITEID FARMCODE FLOCKSEQUENCE SAMPLINGDATE SAMPLINGVISIT BOOTSWAB CLOACALSWAB LIVECHICKEN ESAMPLE ESAMPLESPEC FLOCKPIC FLOCKPIC1 DEVICEINDEX1 entry enteredtime
<chr> <chr> <dbl> <chr> <chr> <chr> <chr> <dttm> <chr> <lgl> <lgl> <lgl> <lgl> <lgl> <lgl> <chr> <chr> <dbl> <dttm>
1 Sample 75-001 1 18ZN 75 001 01 2016-10-24 00:00:00 S TRUE FALSE FALSE FALSE NA TRUE 001-S-01 101-0013 2 2016-10-27 15:38:03
2 Sample 75-002 1 18ZN 75 002 01 2016-10-24 00:00:00 S TRUE FALSE FALSE FALSE NA TRUE 002-S-01 101-0018 2 2016-12-02 08:22:22
3 Sample 75-003 1 18ZN 75 003 01 2016-10-28 00:00:00 S TRUE FALSE FALSE FALSE NA TRUE 003-S-01 101-0024 2 2016-11-07 16:18:17
4 Sample 75-004 1 18ZN 75 004 01 2016-10-28 00:00:00 S TRUE FALSE FALSE FALSE NA TRUE 004-S-01 101-0029 2 2016-11-07 16:19:04
5 Sample 75-005 1 18ZN 75 005 01 2016-11-09 00:00:00 S TRUE FALSE FALSE FALSE NA TRUE 005-S-01 103-0267 2 2016-12-02 08:39:51
6 Sample 75-006 1 18ZN 75 006 01 2016-11-09 00:00:00 S TRUE FALSE FALSE FALSE NA TRUE 006-S-01 103-0289 2 2016-12-02 08:47:52
7 Sample 75-007 1 18ZN 75 007 01 2016-11-09 00:00:00 S TRUE FALSE FALSE FALSE NA TRUE 007-S-01 103-0302 2 2016-12-02 08:59:47
8 Sample 75-008 1 18ZN 75 008 01 2016-11-09 00:00:00 S TRUE FALSE FALSE FALSE NA TRUE 008-S-01 103-0326 2 2016-12-02 09:15:08
9 Sample 75-009 1 18ZN 75 009 01 2016-11-09 00:00:00 S TRUE FALSE FALSE FALSE NA TRUE 009-S-01 102-0037 2 2016-12-02 09:21:22
10 Sample 75-010 1 18ZN 75 010 01 2016-11-16 00:00:00 S TRUE FALSE FALSE FALSE NA TRUE 010-S-01 102-0042 2 2016-12-02 09:27:39
# … with 951 more rows
Retrieving the sampling dates. One key feature of the code below is to convert a sampling date into a week index. SAMPLINGVISIT
tells whether the visit is at the start (S
), the middle (M
) or the end (E
) of the flock.
> (samples <- surveillance$SAMPLE %>%
+ # End sampling of flock 2 of farm 75-013 is duplicated, so we filter it out: CORRECTION
+ dplyr::filter(!(USUBJID == "75-013" & FLOCKSEQUENCE == "02" & SAMPLE_SEQ == 8)) %>%
+ dplyr::select(USUBJID, FLOCKSEQUENCE, SAMPLINGDATE, SAMPLINGVISIT) %>%
+ assign("tmp", ., 1) %>% # for reuse 4 lines later
+ dplyr::filter(SAMPLINGVISIT == "S") %>% # | gets the date...
+ dplyr::select(-SAMPLINGVISIT) %>% # | ... of the first week...
+ dplyr::rename(start = SAMPLINGDATE) %>% # | ... of each flock.
+ dplyr::right_join(tmp, c("USUBJID", "FLOCKSEQUENCE")) %>%
+ dplyr::mutate(WEEK = as.numeric(floor((SAMPLINGDATE - start) / (60 * 60 * 24 * 7)) + 1),
+ sampling = TRUE) %>%
+ dplyr::select(-start, -SAMPLINGDATE, -SAMPLINGVISIT) %>%
+ unique()) # dates of first visits are missing for flock 2 of farm 75-019, THIS NEED TO BE FIXED!!!!!
# A tibble: 957 x 4
USUBJID FLOCKSEQUENCE WEEK sampling
<chr> <chr> <dbl> <lgl>
1 75-001 01 1 TRUE
2 75-002 01 1 TRUE
3 75-003 01 1 TRUE
4 75-004 01 1 TRUE
5 75-005 01 1 TRUE
6 75-006 01 1 TRUE
7 75-007 01 1 TRUE
8 75-008 01 1 TRUE
9 75-009 01 1 TRUE
10 75-010 01 1 TRUE
# … with 947 more rows
> # flock 3 of farm 75-029 and flock 2 of farm 75-058. CORRECTION
This data frame contains all the dates of sampling, one per row.
Retrieving the clinical signs data. Note that there are 2 corrections that we need to do here.
> (clinical_signs <- surveillance[disease] %>%
+ unname() %>% # because of the do.call() call that follows
+ c(list(c("USUBJID", "DISEASE_SEQ"))) %>%
+ do.call(dplyr::left_join, .) %>%
+ dplyr::select(USUBJID, FLOCKSEQUENCE, WEEK, RESPIRATORY, DIARRHOEA, CNS, MALAISE,
+ LEGLESIONS, SUDDENDEATH, DISEASE_GridEvent_SEQ, CHICKENDISEASEDEATH, CHICKENSUDDENDEATH) %>%
+ dplyr::mutate(CHICKENDISEASEDEATH = tidyr::replace_na(CHICKENDISEASEDEATH, 0),
+ CHICKENSUDDENDEATH = tidyr::replace_na(CHICKENSUDDENDEATH, 0)) %>%
+ assign("tmp", ., 1) %>% # for reuse 3 lines later
+ # CORRECTION 1: weeks 8 instead of 9 for one of the two records of cycle 2 of farm 8:
+ dplyr::filter(USUBJID == "75-008", FLOCKSEQUENCE == "02", DISEASE_GridEvent_SEQ == 9) %>%
+ dplyr::mutate(WEEK = 9) %>%
+ dplyr::bind_rows(dplyr::filter(tmp, !(USUBJID == "75-008" & FLOCKSEQUENCE == "02" & DISEASE_GridEvent_SEQ == 9))) %>%
+ assign("tmp", ., 1) %>% # for reuse 3 lines later
+ # CORRECTION 2: week 19 instead of 10 for cycle 6 of farm 21:
+ dplyr::filter(USUBJID == "75-021", FLOCKSEQUENCE == "06", DISEASE_GridEvent_SEQ == 10) %>%
+ dplyr::mutate(WEEK = 10) %>%
+ dplyr::bind_rows(dplyr::filter(tmp, !(USUBJID == "75-021" & FLOCKSEQUENCE == "06" & DISEASE_GridEvent_SEQ == 10))) %>%
+ dplyr::select(-DISEASE_GridEvent_SEQ))
# A tibble: 5,565 x 11
USUBJID FLOCKSEQUENCE WEEK RESPIRATORY DIARRHOEA CNS MALAISE LEGLESIONS SUDDENDEATH CHICKENDISEASEDEATH CHICKENSUDDENDEATH
<chr> <chr> <dbl> <lgl> <lgl> <lgl> <lgl> <lgl> <lgl> <dbl> <dbl>
1 75-021 06 10 FALSE FALSE FALSE FALSE FALSE FALSE 0 0
2 75-008 02 9 FALSE FALSE FALSE FALSE FALSE FALSE 0 0
3 75-001 01 1 FALSE FALSE FALSE FALSE FALSE TRUE 0 3
4 75-001 01 2 FALSE FALSE FALSE FALSE FALSE FALSE 0 0
5 75-001 01 3 FALSE FALSE FALSE FALSE FALSE TRUE 0 1
6 75-001 01 4 FALSE FALSE FALSE FALSE FALSE FALSE 0 0
7 75-001 01 5 FALSE FALSE FALSE FALSE FALSE FALSE 0 0
8 75-001 01 6 FALSE FALSE FALSE FALSE FALSE TRUE 0 2
9 75-001 01 7 FALSE FALSE FALSE FALSE FALSE FALSE 0 0
10 75-001 01 8 FALSE FALSE FALSE FALSE FALSE TRUE 0 2
# … with 5,555 more rows
This data frame contains all the weeks of all the flocks of all the farms. There is no missing weeks:
> missing_weeks(clinical_signs)
There is no missing week.
Here we’ll merge with the drug_codes
dataframe that we generated above.
> (amu <- surveillance[antimicrobial] %>%
+ unname() %>% # because of the do.call() call that follows
+ c(list(c("USUBJID", "ANTIMICROBIAL_SEQ"))) %>%
+ do.call(dplyr::left_join, .) %>%
+ dplyr::select(USUBJID, FLOCKSEQUENCE, WEEKNO, CODE, AMOUTUSED, PACKAGEUNIT) %>%
+ dplyr::rename(WEEK = WEEKNO) %>%
+ dplyr::mutate(PACKAGEUNIT = dplyr::na_if(PACKAGEUNIT, "TAB")) %>%
+ dplyr::left_join(drug_codes, "CODE") %>%
+ tidyr::replace_na(list(ANTINAME1 = "unknown")) %>% # because some antibiotic names are unknown
+ dplyr::mutate(antibiotic = proportion * AMOUTUSED,
+ ANTINAME1 = paste0(ANTINAME1, "_g")) %>%
+ dplyr::select(USUBJID, FLOCKSEQUENCE, WEEK, ANTINAME1, antibiotic) %>%
+ tibble::rowid_to_column() %>% # spread() requires unique row identifiers
+ tidyr::spread(ANTINAME1, antibiotic) %>%
+ dplyr::select(-rowid) %>% # we don't need rowid anymore
+ replace(is.na(.), 0) %>%
+ dplyr::group_by(USUBJID, FLOCKSEQUENCE, WEEK) %>%
+ dplyr::summarise_all(~sum(.)) %>%
+ dplyr::ungroup() %>%
+ dplyr::mutate_at(dplyr::vars(dplyr::ends_with("_g")), . %>% ifelse(is.na(.), 0, .)))
# A tibble: 5,566 x 48
USUBJID FLOCKSEQUENCE WEEK amoxicillin_g ampicillin_g apramycin_g cefadroxil_g cefotaxime_g ceftiofur_g cephalexin_g chloramphenicol… ciprofloxacin_g colistin_g doxycycline_g enramycin_g enrofloxacin_g erythromycin_g florfenicol_g flumequine_g
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 75-001 01 1 0 0 0 0 0 0 0 0 0 0.0735 0 0 0 0 0 0
2 75-001 01 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
3 75-001 01 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
4 75-001 01 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
5 75-001 01 5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
6 75-001 01 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
7 75-001 01 7 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
8 75-001 01 8 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
9 75-001 01 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
10 75-001 01 10 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
# … with 5,556 more rows, and 29 more variables: gentamicin_g <dbl>, josamycin_g <dbl>, kitasamycin_g <dbl>, lincomycin_g <dbl>, marbofloxacin_g <dbl>, methenamine_g <dbl>, neomycin_g <dbl>, norfloxacin_g <dbl>, oxytetracycline_g <dbl>,
# spectinomycin_g <dbl>, spiramycin_g <dbl>, streptomycin_g <dbl>, sulfachloropyridazine_g <dbl>, sulfadiazine_g <dbl>, sulfadimethoxine_g <dbl>, sulfadimidine_g <dbl>, sulfaguanidin_g <dbl>, sulfamethazine_g <dbl>, sulfamethoxazole_g <dbl>,
# sulfamethoxypyridazine_g <dbl>, sulphamethoxazole_g <dbl>, sulphathiazole_g <dbl>, tetracycline_g <dbl>, thiamphenicol_g <dbl>, tiamulin_g <dbl>, tilmicosin_g <dbl>, trimethoprim_g <dbl>, tylosin_g <dbl>, unknown_g <dbl>
No missing weeks:
> missing_weeks(amu)
There is no missing week.
Below is the code to retrieve data on chicken demographics. In there, we are computing a variable indicating whether the cycle is finisheed.
> (chicken <- surveillance[chicks] %>%
+ unname() %>% # because of the do.call() call that follows
+ c(list(c("USUBJID", "MID_INOUT_SEQ"))) %>%
+ do.call(dplyr::left_join, .) %>%
+ dplyr::select(USUBJID, FLOCKSEQUENCE, WEEK, CHICKENTOTAL, CHICKENEXTOTAL, MID_INOUT_GridEvent_SEQ) %>%
+ # CORRECTION:
+ assign("tmp", ., 1) %>% # for reuse 3 lines below
+ dplyr::filter(USUBJID == "75-070", FLOCKSEQUENCE == "08", MID_INOUT_GridEvent_SEQ == 6) %>%
+ dplyr::mutate(WEEK = 6) %>%
+ dplyr::bind_rows(dplyr::filter(tmp, !(USUBJID == "75-070" & FLOCKSEQUENCE == "08" & MID_INOUT_GridEvent_SEQ == 6))) %>%
+ dplyr::select(-MID_INOUT_GridEvent_SEQ) %>%
+ # (end of correction)
+ dplyr::mutate_at(c("CHICKENTOTAL", "CHICKENEXTOTAL"), as.integer) %>%
+ dplyr::mutate_at("CHICKENEXTOTAL", tidyr::replace_na, 0) %>%
+ assign("tmp", ., 1) %>% # for reuse on the final line
+ dplyr::group_by(USUBJID, FLOCKSEQUENCE) %>% # | computing whether...
+ dplyr::summarize(completed = min(CHICKENTOTAL) < 1) %>% # | ... the cycle is...
+ dplyr::ungroup() %>% # | ... finished.
+ dplyr::right_join(tmp, c("USUBJID", "FLOCKSEQUENCE")))
# A tibble: 5,566 x 6
USUBJID FLOCKSEQUENCE completed WEEK CHICKENTOTAL CHICKENEXTOTAL
<chr> <chr> <lgl> <dbl> <int> <dbl>
1 75-070 08 TRUE 6 498 0
2 75-001 01 TRUE 1 207 3
3 75-001 01 TRUE 2 207 0
4 75-001 01 TRUE 3 206 1
5 75-001 01 TRUE 4 206 0
6 75-001 01 TRUE 5 206 0
7 75-001 01 TRUE 6 204 2
8 75-001 01 TRUE 7 204 0
9 75-001 01 TRUE 8 202 2
10 75-001 01 TRUE 9 202 0
# … with 5,556 more rows
No missing week:
> missing_weeks(chicken)
There is no missing week.
Here, we merge the 4 data frames that were computed above: samples
, clinical_signs
, amu
and chicken
. First, we need to check that there is no incompatibility between the amu
, clinical_signs
and chicken
data frames:
> dplyr::anti_join(amu, clinical_signs, c("USUBJID", "FLOCKSEQUENCE", "WEEK")) %>% dplyr::select(USUBJID, FLOCKSEQUENCE, WEEK)
# A tibble: 1 x 3
USUBJID FLOCKSEQUENCE WEEK
<chr> <chr> <dbl>
1 75-004 05 17
> dplyr::anti_join(clinical_signs, amu, c("USUBJID", "FLOCKSEQUENCE", "WEEK")) %>% dplyr::select(USUBJID, FLOCKSEQUENCE, WEEK)
# A tibble: 0 x 3
# … with 3 variables: USUBJID <chr>, FLOCKSEQUENCE <chr>, WEEK <dbl>
> dplyr::anti_join(chicken, clinical_signs, c("USUBJID", "FLOCKSEQUENCE", "WEEK")) %>% dplyr::select(USUBJID, FLOCKSEQUENCE, WEEK)
# A tibble: 1 x 3
USUBJID FLOCKSEQUENCE WEEK
<chr> <chr> <dbl>
1 75-077 06 18
> dplyr::anti_join(clinical_signs, chicken, c("USUBJID", "FLOCKSEQUENCE", "WEEK")) %>% dplyr::select(USUBJID, FLOCKSEQUENCE, WEEK)
# A tibble: 0 x 3
# … with 3 variables: USUBJID <chr>, FLOCKSEQUENCE <chr>, WEEK <dbl>
> dplyr::anti_join(amu, chicken, c("USUBJID", "FLOCKSEQUENCE", "WEEK")) %>% dplyr::select(USUBJID, FLOCKSEQUENCE, WEEK)
# A tibble: 1 x 3
USUBJID FLOCKSEQUENCE WEEK
<chr> <chr> <dbl>
1 75-004 05 17
> dplyr::anti_join(chicken, amu, c("USUBJID", "FLOCKSEQUENCE", "WEEK")) %>% dplyr::select(USUBJID, FLOCKSEQUENCE, WEEK)
# A tibble: 1 x 3
USUBJID FLOCKSEQUENCE WEEK
<chr> <chr> <dbl>
1 75-077 06 18
The week 17
of flock 05
of the farm 75-004
should be removed from the amu
data set:
> amu %>%
+ dplyr::filter(USUBJID == "75-004", FLOCKSEQUENCE == "05", WEEK == 17) %>%
+ dplyr::select(-USUBJID, -FLOCKSEQUENCE, -WEEK) %>%
+ unlist() %>%
+ unique()
[1] 0
Let’s remove it:
> amu %<>% dplyr::filter(!(USUBJID == "75-004" & FLOCKSEQUENCE == "05" & WEEK == 17))
Furthermore, is seems that there is no data for the week 18
of the flock 06
of the farm 75-077
in the amu
and clinical_signs
data frame.
> nrow(amu)
[1] 5565
> nrow(chicken)
[1] 5566
> nrow(clinical_signs)
[1] 5565
We are ready for the merging and writing to disk:
> if (!dir.exists("data")) dir.create("data")
> list(chicken, clinical_signs, amu, samples) %>% # chicken needs to be first because it's the only df with no NA
+ purrr::reduce(dplyr::left_join, by = c("USUBJID", "FLOCKSEQUENCE", "WEEK")) %>%
+ dplyr::mutate(sampling = ! is.na(sampling)) %>% # because in samples there are only sampled weeks
+ dplyr::mutate_at(dplyr::vars(FLOCKSEQUENCE, WEEK), as.integer) %>%
+ dplyr::arrange(USUBJID, FLOCKSEQUENCE, WEEK) %>%
+ dplyr::rename(farm = USUBJID,
+ flock = FLOCKSEQUENCE,
+ week = WEEK,
+ respiratory = RESPIRATORY,
+ diarrhoea = DIARRHOEA,
+ cns = CNS,
+ malaise = MALAISE,
+ leg_lesions = LEGLESIONS,
+ sudden_death = SUDDENDEATH,
+ nb_chicken = CHICKENTOTAL,
+ chicken_disease_death = CHICKENDISEASEDEATH,
+ chicken_sudden_death = CHICKENSUDDENDEATH,
+ nb_chicken_sold = CHICKENEXTOTAL) %>%
+ dplyr::select(farm, flock, week, sampling, completed, nb_chicken, nb_chicken_sold, chicken_disease_death, chicken_sudden_death, dplyr::everything()) %>%
+ write.csv("data/viparc.csv", FALSE, row.names = FALSE)
Note that, among the clinical_signs
, amu
, chicken
and samples
data frames, samples
is the only one that contains rows only for the weeks where samples are taken. The three other data frames contain rows for all the weeks of all the flocks of all the farms. For this reason, after the merging, we have to replace the missing values in the sampling
variable by FALSE
.