--- author: Sam White toc-title: Contents toc-depth: 5 toc-location: left title: Plotting Fun - Lab Meeting Task date: '2023-11-30' draft: false engine: knitr categories: - plotting - R --- For today's lab meeting, Steven tasked us with the following on Wednesday: > For lab meeting Thursday the topic is "Plots". Before the meeting make sure to create a notebook post where you show-off a plot of something you did in November, including showing the code used to generate the plot. :bar_chart: - Drop the url to the notebook post below. So, drawing a blank on what to plot, I opted to visualize commit history for the [Coral E5 deep dive repo](https://github.com/urol-e5/deep-dive) (GitHub repo). Let's start with just trying to plot commits per day. # Initial setup ## Load libraries ```{r load-libraries} #| code-fold: false #| echo: false library(ggplot2) library(dplyr) ``` ## Set Git Repo Path ```{r set-git-repo-path} #| code-fold: false repo_path <- "/home/sam/gitrepos/deep-dive/" ``` ## Set date range ```{r set-date-range} #| code-fold: false # Set date range start_date <- as.Date("2023-11-01") end_date <- as.Date("2023-11-30") # Catpures all days in range all_days <- data.frame(Date = seq(start_date, end_date, by = "day")) head(all_days) ``` ## Define the Git command to get commit dates using `system()` ```{r set-git-command} #| code-fold: false git_command <- paste("git --git-dir=", repo_path, ".git log --pretty=format:%ad --date=short --after=", start_date, " --before=", end_date, " --all", sep="") print(git_command) ``` # Data manipulation ## Excecute git command ```{r run-git-command} #| code-fold: false commit_info <- system(git_command, intern = TRUE) str(commit_info) ``` ## Convert to Dates structure ```{r convert-commit-dates} #| code-fold: false commit_dates <- as.Date(gsub("'", "", commit_info)) str(commit_dates) ``` ## Count number of commits This will create a two-column dataframe (Date, Commits) and replace dates with 0 commits (NAs) with `0`. ```{r count-commits} #| code-fold: false # This counts number of commits on each date. commit_table <- table(commit_dates) str(commit_table) all_days <- merge( all_days, data.frame( Date = as.Date( names(commit_table) ), Commits = as.numeric( commit_table) ), all.x = TRUE, by = "Date") # Assigns value of `0` to all NAs all_days$Commits[is.na(all_days$Commits)] <- 0 head(all_days) ``` # Plot all commits per day ```{r plot-all-commits-per-day} #| code-fold: false #| label: bar-plot-all-commits #| fig-cap: "Bar plot of all November 2023 commits" ggplot(all_days, aes(x = Date, y = Commits)) + geom_bar(stat = "identity", fill = "steelblue") + labs(title = "Commits per Day in Nov 2023", x = "Date", y = "Number of Commits") + theme_minimal() ``` Now, let's try to look at commits per day by author... # Commits per day per author ## Create a data frame with all days in November ```{r df-all-days-november} #| code-fold: false all_days <- data.frame(Date = seq(start_date, end_date, by = "day")) ``` ## New git command This adds author to format output ```{r update-git-command} #| code-fold: false git_command <- paste("git --git-dir=",repo_path, ".git log --date=short --all --format='%ad %an'", sep="") commit_info <- system(git_command, intern = TRUE) ``` ## Split the commit information into date and author ```{r split-commit-info} #| code-fold: false commit_info_split <- strsplit(commit_info, " ") commit_dates <- as.Date(sapply(commit_info_split, "[[", 1)) commit_authors <- sapply(commit_info_split, "[[", 2) str(commit_dates) str(commit_authors) ``` ## Create data frame with commits and authors ```{r df-with-authors-dates} #| code-fold: false # Create a data frame with commit information commit_data <- data.frame(Date = commit_dates, Author = commit_authors) head(commit_data) str(commit_data) ``` ### Update dataframe to include all days in November ```{r df-include-all-days-in-Nov} #| code-fold: false all_days <- merge(all_days, commit_data, all.x = TRUE, by = "Date") str(all_days) ``` ### Update dataframe with commit counts per author ```{r count-commits-by-author-per-day} #| code-fold: false all_days <- all_days %>% group_by(Date, Author) %>% summarise(Commits = n()) str(all_days) ``` # Plot commits per day per author ```{r plot-commits-per-day-per-author} #| code-fold: false #| label: stacked-bar-plot-commits-by-author #| fig-cap: "Stacked bar plot of November 2023 commits per day per author" ggplot(all_days, aes(x = Date, y = Commits, fill = Author)) + geom_bar(stat = "identity") + labs(title = "Commits by Author per Day in Nov 2023", x = "Date", y = "Number of Commits") + theme_minimal() + scale_fill_brewer(palette = "Set2") ```