COVID-19 Number of Tests in Italy

Table of Contents

Menu

Introduction

This page presents some data about the number of tests and people tested for COVID-19 over time in Italy and compares them with the number of people found positive.

This page was created on <2020-08-20 Thu> and last updated on <2020-09-15 Tue>.

The source code available on the COVID-19 pages is distributed under the MIT License; the content is distributed under a Creative Commons - Attribution 4.0.

Getting data into R

We first read the data from the Civil Protection repository adding the ratio between positives and tests, computed on the same day and computed with data shifted by two days (on the assumption tests take two days to complete).

In fact data about tests is used with different semantics by different regions. Some regions reports tests with results (and the ratio new positives / tests makes sense). Other reports the number of test performed, in which case the correct ratio is between positives and tests performed some days earlier. We assume two days and report both ratios for all regions. See the following issue on GitHub for an explanation and some more details https://github.com/pcm-dpc/COVID-19/issues/577 (in Italian).

PATH="/home/adolfo/Downloads/COVID-19/"
DIGITS = 4

# evolution over time at the National level
national = read.csv(file.path(PATH, "dati-andamento-nazionale/dpc-covid19-ita-andamento-nazionale.csv"))
national$data <- as.Date(national$data)
national$nuovi_casi_testati = c(NA, diff(national$casi_testati, 1))
national$p_over_t <- round(national$nuovi_positivi / national$nuovi_casi_testati, digits = DIGITS) * 100
national$nuovi_casi_testati_2 <- c(NA, NA, head(national$nuovi_casi_testati, -2))
national$p_over_t_2 = round(national$nuovi_positivi / national$nuovi_casi_testati_2, digits = DIGITS) * 100

Concerning the regional level, computed columns, such as the number of people tested in a day, have to be computed after filtering, or the diif will work on values from different regions.

# evolution over time, by Region
data = read.csv(file.path(PATH, "dati-regioni/dpc-covid19-ita-regioni.csv"))
data$data <- as.Date(data$data)

These are the columns we are interested in and their translation in English:

cols = c(
  "data",
  "casi_testati",
  "totale_positivi",
  "nuovi_casi_testati",
  "nuovi_positivi",
  "p_over_t",
  "p_over_t_2"
)

We now define a function to ouput the last N rows of the input data frame. The real “challenge”, here, is transposing the data, to get a more natural presentation (with time progressing from left to right).

table_data <- function(df, cols, rows = 10) {
  # get the last 10 elements and the interesting columns of the dataframe
  f  <- tail(df, rows)
  rf <- f[, cols]

  # the labels in the transposed matrix are the column names of the original data.frame
  row_labels  <- colnames(rf)
  # the columns in the trasposed matrix are the dates
  col_labels  <- c("Label", format(rf$data, "%a, %b %d"))

  rft <- data.frame(row_labels, t(rf))
  colnames(rft) <- col_labels
  return(rft[-1,])
}

People Tested and Cases in Italy

Data of the last ten days

table_data(national, cols)
Label Fri, Sep 18 Sat, Sep 19 Sun, Sep 20 Mon, Sep 21 Tue, Sep 22 Wed, Sep 23 Thu, Sep 24 Fri, Sep 25 Sat, Sep 26 Sun, Sep 27
casi_testati 6187258 6248028 6302761 6342654 6391999 6455762 6520661 6584670 6646695 6700432
totale_positivi 42457 43161 44098 45079 45489 46114 46780 47718 48593 49618
nuovi_casi_testati 60015 60770 54733 39893 49345 63763 64899 64009 62025 53737
nuovi_positivi 1907 1638 1587 1350 1392 1640 1786 1912 1869 1766
p_over_t 3.18 2.7 2.9 3.38 2.82 2.57 2.75 2.99 3.01 3.29
p_over_t_2 3.28 2.62 2.64 2.22 2.54 4.11 3.62 3.0 2.88 2.76

Number of Tests

plot(x = national[national$data >= "2020-08-01", c("data")], 
     y = national[national$data >= "2020-08-01", c("nuovi_casi_testati")], 
     type="l", lwd=6, pch=16, cex=2.5, col=c("#3B3176"))
text(x = national[national$data >= "2020-08-01", c("data")],
     y = national[national$data >= "2020-08-01", c("nuovi_casi_testati")],
     labels = national[national$data >= "2020-08-01", c("nuovi_casi_testati")],
     pos = 3, cex = 1.5, col=c("#3B3176"))
 grid(col="black")

tests_italia.png

Number of Tests and New Cases

Plot new cases and tests together. (Solution taken from How can I plot with 2 different y-axes? on Stack Overflow.)

## add extra space to right margin of plot within frame
par(mar=c(5, 4, 4, 6) + 0.1)

## Plot first set of data and draw its axis
tests_limits = c( min(national[national$data >= "2020-08-01", c("nuovi_casi_testati")]), max(national[national$data >= "2020-08-01", c("nuovi_casi_testati")]) )
plot(x = national[national$data >= "2020-08-01", c("data")], 
     y = national[national$data >= "2020-08-01", c("nuovi_casi_testati")], 
     type="l", lwd=6, pch=11, cex=1.5, col=c("#3B3176"),
     axes=FALSE,
     ylim=tests_limits,
     ylab="", xlab="")
text(x = national[national$data >= "2020-08-01", c("data")],
     y = national[national$data >= "2020-08-01", c("nuovi_casi_testati")],
     labels = national[national$data >= "2020-08-01", c("nuovi_casi_testati")],
     pos = 3, cex = 1, col=c("#3B3176"))
mtext("Number of Tests", side=2, col="#3B3176", line=4) 
axis(2, ylim=tests_limits, col="black", las=1)  
box()

## Allow a second plot on the same graph
par(new=TRUE)
new_cases_limits = c( min(national[national$data >= "2020-08-01", c("nuovi_positivi")]), max(national[national$data >= "2020-08-01", c("nuovi_positivi")]) )

p = plot(x = national[national$data >= "2020-08-01", c("data")], 
     y = national[national$data >= "2020-08-01", c("nuovi_positivi")], 
     type="l", lwd=6, pch=21, cex=1.5, col=c("#AA0000"),
     axes=FALSE,
     ylim=new_cases_limits,
     ylab="", xlab="")
text(x = national[national$data >= "2020-08-01", c("data")],
     y = national[national$data >= "2020-08-01", c("nuovi_positivi")],
     labels = national[national$data >= "2020-08-01", c("nuovi_positivi")],
     pos = 1, cex = 1, col="#AA0000")
mtext("New Cases", side=4, line=4, col="#AA0000") 
axis(4, ylim=new_cases_limits, las=1)

grid(p, col = "black", lty = "dotted")

# x-axis
dates = national[national$data >= "2020-08-01", c("data")]
axis.Date(1, at=seq(min(dates), max(dates), by="week"), format="%b %d", las=2)
mtext("Day", side=1, line=2.5)

## Add Legend
legend("topleft", legend = c("Tests", "New Cases"),
       text.col = c("#3B3176", "#AA0000"), pch= c(15, 17), col=c("#3B3176", "#AA0000"))

tests_and_new_cases_italia.png

People Tested and Cases in Trentino

region <- subset(data, denominazione_regione == "P.A. Trento")

region$nuovi_casi_testati = c(NA, diff(region$casi_testati, 1))
region$p_over_t <- round(region$nuovi_positivi / region$nuovi_casi_testati, digits = DIGITS) * 100
region$nuovi_casi_testati_2 = c(NA, NA, diff(region$casi_testati, 2))
region$p_over_t_2 = round(region$nuovi_positivi / region$nuovi_casi_testati_2, digits = DIGITS) * 100
region$nuovi_casi_testati_2 <- c(NA, NA, head(region$nuovi_casi_testati, -2))
region$p_over_t_2 = round(region$nuovi_positivi / region$nuovi_casi_testati_2, digits = DIGITS) * 100

table_data(region, cols)
Label Fri, Sep 18 Sat, Sep 19 Sun, Sep 20 Mon, Sep 21 Tue, Sep 22 Wed, Sep 23 Thu, Sep 24 Fri, Sep 25 Sat, Sep 26 Sun, Sep 27
casi_testati 91799 92546 93131 93200 93785 94533 94954 95829 96818 97509
totale_positivi 558 534 557 558 548 560 549 548 563 598
nuovi_casi_testati 466 747 585 69 585 748 421 875 989 691
nuovi_positivi 18 22 45 2 25 22 23 23 51 42
p_over_t 3.86 2.95 7.69 2.9 4.27 2.94 5.46 2.63 5.16 6.08
p_over_t_2 2.22 3.04 9.66 0.27 4.27 31.88 3.93 3.07 12.11 4.8

People Tested and Cases in Liguria

region <- subset(data, denominazione_regione == "Liguria")

region$nuovi_casi_testati = c(NA, diff(region$casi_testati, 1))
region$p_over_t <- round(region$nuovi_positivi / region$nuovi_casi_testati, digits = DIGITS) * 100
region$nuovi_casi_testati_2 = c(NA, NA, diff(region$casi_testati, 2))
region$p_over_t_2 = round(region$nuovi_positivi / region$nuovi_casi_testati_2, digits = DIGITS) * 100
region$nuovi_casi_testati_2 <- c(NA, NA, head(region$nuovi_casi_testati, -2))
region$p_over_t_2 = round(region$nuovi_positivi / region$nuovi_casi_testati_2, digits = DIGITS) * 100

table_data(region, cols)
Label Fri, Sep 18 Sat, Sep 19 Sun, Sep 20 Mon, Sep 21 Tue, Sep 22 Wed, Sep 23 Thu, Sep 24 Fri, Sep 25 Sat, Sep 26 Sun, Sep 27
casi_testati 150104 151550 152634 153324 154818 156756 158350 159717 161282 162174
totale_positivi 1414 1433 1461 1525 1530 1593 1654 1726 1737 1738
nuovi_casi_testati 1912 1446 1084 690 1494 1938 1594 1367 1565 892
nuovi_positivi 158 78 85 64 105 108 102 73 97 45
p_over_t 8.26 5.39 7.84 9.28 7.03 5.57 6.4 5.34 6.2 5.04
p_over_t_2 10.06 6.81 4.45 4.43 9.69 15.65 6.83 3.77 6.09 3.29

People Tested and Cases in Veneto

region <- subset(data, denominazione_regione == "Veneto")

region$nuovi_casi_testati = c(NA, diff(region$casi_testati, 1))
region$p_over_t <- round(region$nuovi_positivi / region$nuovi_casi_testati, digits = DIGITS) * 100
region$nuovi_casi_testati_2 = c(NA, NA, diff(region$casi_testati, 2))
region$p_over_t_2 = round(region$nuovi_positivi / region$nuovi_casi_testati_2, digits = DIGITS) * 100
region$nuovi_casi_testati_2 <- c(NA, NA, head(region$nuovi_casi_testati, -2))
region$p_over_t_2 = round(region$nuovi_positivi / region$nuovi_casi_testati_2, digits = DIGITS) * 100

table_data(region, cols)
Label Fri, Sep 18 Sat, Sep 19 Sun, Sep 20 Mon, Sep 21 Tue, Sep 22 Wed, Sep 23 Thu, Sep 24 Fri, Sep 25 Sat, Sep 26 Sun, Sep 27
casi_testati 691678 697466 701455 703443 707232 711874 717052 721936 727456 731147
totale_positivi 3009 3045 3093 3134 3143 3155 3272 3336 3432 3490
nuovi_casi_testati 4314 5788 3989 1988 3789 4642 5178 4884 5520 3691
nuovi_positivi 176 186 173 103 119 150 248 196 216 159
p_over_t 4.08 3.21 4.34 5.18 3.14 3.23 4.79 4.01 3.91 4.31
p_over_t_2 2.96 4.48 4.01 1.78 2.98 7.55 6.55 4.22 4.17 3.26

People Tested and Cases in Lombardia

region <- subset(data, denominazione_regione == "Lombardia")

region$nuovi_casi_testati = c(NA, diff(region$casi_testati, 1))
region$p_over_t <- round(region$nuovi_positivi / region$nuovi_casi_testati, digits = DIGITS) * 100
region$nuovi_casi_testati_2 = c(NA, NA, diff(region$casi_testati, 2))
region$p_over_t_2 = round(region$nuovi_positivi / region$nuovi_casi_testati_2, digits = DIGITS) * 100
region$nuovi_casi_testati_2 <- c(NA, NA, head(region$nuovi_casi_testati, -2))
region$p_over_t_2 = round(region$nuovi_positivi / region$nuovi_casi_testati_2, digits = DIGITS) * 100

table_data(region, cols)
Label Fri, Sep 18 Sat, Sep 19 Sun, Sep 20 Mon, Sep 21 Tue, Sep 22 Wed, Sep 23 Thu, Sep 24 Fri, Sep 25 Sat, Sep 26 Sun, Sep 27
casi_testati 1184027 1197038 1206551 1213246 1225067 1239028 1252897 1265754 1278033 1288690
totale_positivi 9027 8935 9007 9076 9027 9104 9048 9171 9087 9237
nuovi_casi_testati 10246 13011 9513 6695 11821 13961 13869 12857 12279 10657
nuovi_positivi 224 243 211 90 182 196 229 277 256 216
p_over_t 2.19 1.87 2.22 1.34 1.54 1.4 1.65 2.15 2.08 2.03
p_over_t_2 1.88 1.81 2.06 0.69 1.91 2.93 1.94 1.98 1.85 1.68

Author: Adolfo Villafiorita

Last modified: 2020-09-15 Tue 17:57 (created on: 2020-08-20 Thu 00:00)

Published: 2020-09-27 Sun 18:08