regression.ts 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208
  1. import { compose } from '@typed/compose';
  2. import groupBy from 'lodash/groupBy';
  3. import flatten from 'lodash/flatten';
  4. import differenceInDays from 'date-fns/differenceInDays';
  5. import addDays from 'date-fns/addDays';
  6. import { Cases, Data, DataPoint, CountryCase, CountryCases, CountryDataPoint } from '../types';
  7. // Start the regression line after this many cases have been recorded in total
  8. const regressionStart = 50;
  9. // Number of days to extrapolate the regression fit on the graph
  10. const futureDays = 3;
  11. const mean = (values: number[]): number =>
  12. values.reduce((last, value) => last + value, 0) / values.length;
  13. type WithNumericDate = Omit<CountryDataPoint, 'regression'>[];
  14. const withNumericDate = (cases: Cases): WithNumericDate =>
  15. cases.map(({ date, value }) => ({
  16. date,
  17. xValue: date.getTime(),
  18. value,
  19. }));
  20. type WithCumulative = (Omit<CountryDataPoint, 'regression'> & {
  21. valueCumulative: number;
  22. })[];
  23. const withCumulative = (cumulative: boolean) => (cases: WithNumericDate): WithCumulative => {
  24. const valuesWithCumulative = cases.reduce(
  25. (last: WithCumulative, { date, xValue, value }): WithCumulative => [
  26. ...last,
  27. {
  28. date,
  29. xValue,
  30. value,
  31. valueCumulative: (value || 0) + (last[last.length - 1]?.valueCumulative || 0),
  32. },
  33. ],
  34. [],
  35. );
  36. if (cumulative) {
  37. return valuesWithCumulative.map(({ valueCumulative, value, ...rest }) => ({
  38. ...rest,
  39. value: valueCumulative,
  40. valueCumulative,
  41. }));
  42. }
  43. return valuesWithCumulative;
  44. };
  45. const logArray = (values: number[]): number[] => values.map(value => Math.log(value));
  46. const withExponentialRegression = (cumulative: boolean, regressionBuffer: number) => (
  47. cases: WithCumulative,
  48. ): CountryDataPoint[] => {
  49. const startIndex = cases.findIndex(
  50. ({ valueCumulative = 0 }) => valueCumulative >= regressionStart,
  51. );
  52. if (startIndex === -1 || startIndex >= cases.length - regressionBuffer) {
  53. return cases;
  54. }
  55. const casesToRegress = cases.slice(startIndex, cases.length - regressionBuffer);
  56. // It's assumed that the input here is ordered by date ascending
  57. const minDate: Date = new Date(casesToRegress[0].date);
  58. const xSeries = casesToRegress.map(({ date }) => 1 + differenceInDays(new Date(date), minDate));
  59. const xBar = mean(xSeries);
  60. const xVariance = mean(xSeries.map(value => value ** 2)) - xBar ** 2;
  61. const ySeries = logArray(
  62. cumulative
  63. ? casesToRegress.map(({ valueCumulative = 0 }) => valueCumulative)
  64. : casesToRegress.map(({ value = 0 }) => value),
  65. );
  66. const yBar = mean(ySeries);
  67. const covariance =
  68. xSeries.reduce((last, value, index) => last + (value - xBar) * (ySeries[index] - yBar), 0) /
  69. xSeries.length;
  70. const slope = covariance / xVariance;
  71. const intercept = yBar - slope * xBar;
  72. const regressionAtDate = (date: Date): number =>
  73. Math.exp(slope * differenceInDays(date, minDate) + intercept);
  74. const lastDate = cases[cases.length - 1].date;
  75. const future: CountryDataPoint[] = new Array(futureDays)
  76. .fill(0)
  77. .map((_, index) => addDays(lastDate, index + 1))
  78. .map(date => ({ date, xValue: date.getTime() }));
  79. return [...cases, ...future].map(({ date, ...rest }) => ({
  80. date,
  81. regression: regressionAtDate(date),
  82. ...rest,
  83. }));
  84. };
  85. function processCountryCases(
  86. cases: Cases,
  87. cumulative: boolean,
  88. regressionBuffer: number,
  89. ): CountryDataPoint[] {
  90. if (!cases.length) {
  91. return [];
  92. }
  93. return compose<Cases, WithNumericDate, WithCumulative, CountryDataPoint[]>(
  94. withExponentialRegression(cumulative, regressionBuffer),
  95. withCumulative(cumulative),
  96. withNumericDate,
  97. )(cases);
  98. }
  99. function combineData(items: Data[]): Data {
  100. const groups: {
  101. [xValue: string]: Data;
  102. } = groupBy(flatten(items), 'xValue');
  103. return Object.keys(groups)
  104. .sort((timeA: string, timeB: string) => Number(timeA) - Number(timeB))
  105. .map(key =>
  106. groups[key].reduce(
  107. (last: DataPoint, item: DataPoint): DataPoint => ({
  108. ...last,
  109. value: {
  110. ...last.value,
  111. ...item.value,
  112. },
  113. regression: {
  114. ...last.regression,
  115. ...item.regression,
  116. },
  117. }),
  118. {
  119. date: groups[key][0].date,
  120. xValue: groups[key][0].xValue,
  121. value: {},
  122. regression: {},
  123. },
  124. ),
  125. );
  126. }
  127. function fillData(countryCases: CountryCases): CountryCases {
  128. const times: number[] = Array.from(
  129. new Set(
  130. flatten(
  131. countryCases.map(({ dataSource: { cases } }) => cases.map(({ date }) => date.getTime())),
  132. ),
  133. ),
  134. ).sort((timeA, timeB) => timeA - timeB);
  135. return countryCases.map(
  136. ({ country, dataSource }): CountryCase => ({
  137. country,
  138. dataSource: {
  139. ...dataSource,
  140. cases: times.reduce((last: Cases, time: number): Cases => {
  141. const matchingCase = dataSource.cases.find(({ date }) => date.getTime() === time);
  142. if (matchingCase) {
  143. return [...last, matchingCase];
  144. }
  145. return [
  146. ...last,
  147. {
  148. date: new Date(time),
  149. value: 0,
  150. },
  151. ];
  152. }, []),
  153. },
  154. }),
  155. );
  156. }
  157. export function processCases(
  158. countryCases: CountryCases,
  159. cumulative = true,
  160. regressionBuffer = 0, // Don't count the last X days into the regression fit
  161. ): Data {
  162. const filledData = fillData(countryCases);
  163. const data = filledData.map(({ country, dataSource: { cases } }) =>
  164. processCountryCases(cases, cumulative, regressionBuffer).map(
  165. ({ date, xValue, value, regression }) => ({
  166. date,
  167. xValue,
  168. value: {
  169. [country]: value,
  170. },
  171. regression: {
  172. [country]: regression,
  173. },
  174. }),
  175. ),
  176. );
  177. return combineData(data);
  178. }