geom_annotate()The lab instructions can be found here; we will work through its contents together via Webex. You will use this RMarkdown file as your workspace and final document. Don’t forget to update the “author” metadata field at the top of the file!
moma<-read_csv(here::here("data", "artworks-cleaned.csv"))## Parsed with column specification:
## cols(
## .default = col_double(),
## title = col_character(),
## artist = col_character(),
## artist_bio = col_character(),
## artist_gender = col_character(),
## circumference_cm = col_logical(),
## diameter_cm = col_logical(),
## length_cm = col_logical(),
## seat_height_cm = col_logical(),
## purchase = col_logical(),
## gift = col_logical(),
## exchange = col_logical(),
## classification = col_character(),
## department = col_character()
## )
## See spec(...) for full column specifications.
glimpse(moma)## Observations: 2,253
## Variables: 23
## $ title <chr> "Rope and People, I", "Fire in the Evening", "Portr…
## $ artist <chr> "Joan Miró", "Paul Klee", "Paul Klee", "Pablo Picas…
## $ artist_bio <chr> "(Spanish, 1893–1983)", "(German, born Switzerland.…
## $ artist_birth_year <dbl> 1893, 1879, 1879, 1881, 1880, 1879, 1943, 1880, 183…
## $ artist_death_year <dbl> 1983, 1940, 1940, 1973, 1946, 1953, 1977, 1950, 190…
## $ num_artists <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ n_female_artists <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ n_male_artists <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ artist_gender <chr> "Male", "Male", "Male", "Male", "Male", "Male", "Ma…
## $ year_acquired <dbl> 1936, 1970, 1966, 1955, 1939, 1968, 1997, 1931, 193…
## $ year_created <dbl> 1935, 1929, 1927, 1919, 1925, 1919, 1970, 1929, 188…
## $ circumference_cm <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ depth_cm <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ diameter_cm <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ height_cm <dbl> 104.8, 33.8, 60.3, 215.9, 50.8, 129.2, 200.0, 54.6,…
## $ length_cm <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ width_cm <dbl> 74.6, 33.3, 36.8, 78.7, 54.0, 89.9, 200.0, 38.1, 96…
## $ seat_height_cm <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ purchase <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FA…
## $ gift <lgl> TRUE, FALSE, FALSE, TRUE, TRUE, FALSE, TRUE, TRUE, …
## $ exchange <lgl> FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FAL…
## $ classification <chr> "Painting", "Painting", "Painting", "Painting", "Pa…
## $ department <chr> "Painting & Sculpture", "Painting & Sculpture", "Pa…
moma %>%
select(artist,title,year_acquired) %>%
arrange(year_acquired)## # A tibble: 2,253 x 3
## artist title year_acquired
## <chr> <chr> <dbl>
## 1 Edward Hopper House by the Railroad 1930
## 2 Bernard Karfiol Seated Nude 1930
## 3 Pierre Roy Daylight Savings Time 1931
## 4 Preston Dickins… Plums on a Plate 1931
## 5 Otto Dix Dr. Mayer-Hermann 1932
## 6 Paul Cézanne The Bather 1934
## 7 Paul Cézanne Pines and Rocks (Fontainebleau?) 1934
## 8 Paul Cézanne Still Life with Ginger Jar, Sugar Bowl, and O… 1934
## 9 Paul Cézanne Still Life with Apples 1934
## 10 Arthur B. Davies Italian Landscape 1934
## # … with 2,243 more rows
##Edward Hopper House by the Railroad 1930
moma %>%
select(artist,title,year_created) %>%
arrange(year_created)## # A tibble: 2,253 x 3
## artist title year_created
## <chr> <chr> <dbl>
## 1 Odilon Redon Landscape at Daybreak 1872
## 2 Odilon Redon Apache (Man on Horseback) 1875
## 3 Odilon Redon Apache (Man on Horseback II) 1875
## 4 Odilon Redon Fishing Boat 1875
## 5 Odilon Redon Rocky Peak 1875
## 6 Odilon Redon The Rocky Slope 1875
## 7 Odilon Redon Landscape with Rocks, near Royan 1875
## 8 Paul Cézanne Still Life with Fruit Dish 1879
## 9 Paul Cézanne L'Estaque 1879
## 10 Claude Monet On the Cliff at Pourville, Clear Weather 1882
## # … with 2,243 more rows
moma %>%
distinct(artist)## # A tibble: 989 x 1
## artist
## <chr>
## 1 Joan Miró
## 2 Paul Klee
## 3 Pablo Picasso
## 4 Arthur Dove
## 5 Francis Picabia
## 6 Blinky Palermo
## 7 Pierre Roy
## 8 Paul Cézanne
## 9 Enrico Prampolini
## 10 Jankel Adler
## # … with 979 more rows
moma %>%
count(artist, sort = TRUE)## # A tibble: 989 x 2
## artist n
## <chr> <int>
## 1 Pablo Picasso 55
## 2 Henri Matisse 32
## 3 On Kawara 32
## 4 Jacob Lawrence 30
## 5 Batiste Madalena 25
## 6 Jean Dubuffet 25
## 7 Odilon Redon 25
## 8 Ben Vautier 24
## 9 Frank Stella 23
## 10 Philip Guston 23
## # … with 979 more rows
moma %>%
count(artist, sort=TRUE) %>%
count()## # A tibble: 1 x 1
## n
## <int>
## 1 989
moma %>%
count(artist_gender)## # A tibble: 3 x 2
## artist_gender n
## <chr> <int>
## 1 Female 252
## 2 Male 1991
## 3 <NA> 10
moma %>%
count(artist_gender, artist, sort=TRUE) %>%
filter(artist_gender=="Female")## # A tibble: 143 x 3
## artist_gender artist n
## <chr> <chr> <int>
## 1 Female Sherrie Levine 12
## 2 Female Agnes Martin 9
## 3 Female Elizabeth Murray 8
## 4 Female Susan Rothenberg 8
## 5 Female Joan Mitchell 6
## 6 Female Loren MacIver 6
## 7 Female R. H. Quaytman 6
## 8 Female Helen Frankenthaler 5
## 9 Female Georgia O'Keeffe 4
## 10 Female Lynette Yiadom-Boakye 4
## # … with 133 more rows
moma %>%
count(artist_gender, artist, sort=TRUE) %>%
filter(artist_gender=="Male")## # A tibble: 837 x 3
## artist_gender artist n
## <chr> <chr> <int>
## 1 Male Pablo Picasso 55
## 2 Male Henri Matisse 32
## 3 Male On Kawara 32
## 4 Male Jacob Lawrence 30
## 5 Male Batiste Madalena 25
## 6 Male Jean Dubuffet 25
## 7 Male Odilon Redon 25
## 8 Male Ben Vautier 24
## 9 Male Frank Stella 23
## 10 Male Philip Guston 23
## # … with 827 more rows
moma %>%
count(year_acquired, sort=TRUE)## # A tibble: 88 x 2
## year_acquired n
## <dbl> <int>
## 1 1985 86
## 2 1942 71
## 3 1979 71
## 4 1991 67
## 5 2005 67
## 6 1967 65
## 7 2008 55
## 8 1961 45
## 9 1969 45
## 10 1956 42
## # … with 78 more rows
moma %>%
count(year_created, sort=TRUE)## # A tibble: 139 x 2
## year_created n
## <dbl> <int>
## 1 1977 57
## 2 1940 56
## 3 1964 56
## 4 1961 50
## 5 1962 49
## 6 1963 44
## 7 1959 42
## 8 1968 40
## 9 1960 39
## 10 1914 37
## # … with 129 more rows
moma %>%
filter(num_artists == 1 & n_female_artists == 1) %>%
select(title, artist, year_acquired, year_created) %>%
arrange(year_acquired)## # A tibble: 252 x 4
## title artist year_acquired year_created
## <chr> <chr> <dbl> <dbl>
## 1 Landscape, 47 Natalia Goncharova 1937 1912
## 2 Shack Loren MacIver 1938 1934
## 3 Hopscotch Loren MacIver 1940 1940
## 4 Shadows with Painting Irene Rice Pereira 1941 1940
## 5 Figure Varvara Stepanova 1941 1921
## 6 Still Life in Red Amelia Peláez Del Ca… 1942 1938
## 7 White Lines Irene Rice Pereira 1942 1942
## 8 Musical Squash Maud Morgan 1942 1942
## 9 Desolation Raquel Forner 1942 1942
## 10 Self-Portrait with Cropped … Frida Kahlo 1943 1940
## # … with 242 more rows
moma %>%
filter(num_artists == 1 & n_female_artists == 1) %>%
select(title, artist, year_acquired, year_created) %>%
arrange(year_created)## # A tibble: 252 x 4
## title artist year_acquired year_created
## <chr> <chr> <dbl> <dbl>
## 1 Self-Portrait with Two Flowers in… Paula Modersoh… 2017 1907
## 2 Girl with Bare Shoulders Gwen John 1958 1909
## 3 Girl Reading at a Window Gwen John 1971 1911
## 4 Landscape, 47 Natalia Goncha… 1937 1912
## 5 Cubist Nude Alexandra Exter 1991 1912
## 6 Rayonism, Blue-Green Forest Natalia Goncha… 1985 1913
## 7 The Factory and the Bridge Olga Rozanova 1985 1913
## 8 Subject from a Dyer's Shop Lyubov Popova 1985 1914
## 9 Portuguese Market Sonia Delaunay… 1955 1915
## 10 Girl with a Blue Scarf Gwen John 1963 1915
## # … with 242 more rows
ggplot(moma, aes(year_created, year_acquired))+
geom_point(alpha=0.1, na.rm=TRUE)+
geom_abline(intercept=c(0,0), color="red")+
labs(x="Year Painted", y="Year Acquired")+
ggtitle("MoMA Keeps Its Collection Current")ggplot(moma, aes(year_created, year_acquired))+
geom_point(alpha=0.1, na.rm=TRUE)+
geom_abline(intercept=c(0,0), color="red")+
labs(x="Year Painted", y="Year Acquired")+
ggtitle("MoMA Keeps Its Collection Current")+
facet_wrap(~artist_gender)moma_dim <- moma %>%
filter(height_cm < 600, width_cm < 760) %>%
mutate(hw_ratio = height_cm / width_cm,
hw_cat = case_when(
hw_ratio > 1 ~ "taller than wide",
hw_ratio < 1 ~ "wider than tall",
hw_ratio == 1 ~ "perfect square"
))library(ggthemes)
ggplot(moma_dim, aes(x = width_cm, y = height_cm, colour = hw_cat)) +
geom_point(alpha = .5) +
ggtitle("MoMA Paintings, Tall and Wide") +
scale_colour_manual(name = "",
values = c("gray50", "#ee5863", "#6999cd")) +
theme_fivethirtyeight() +
theme(axis.title = element_text()) +
labs(x = "Width", y = "Height") ## Different colors
library(ggthemes)
ggplot(moma_dim, aes(x = width_cm, y = height_cm, colour = hw_cat)) +
geom_point(alpha = .5) +
ggtitle("MoMA Paintings, Tall and Wide") +
scale_colour_manual(name = "",
values = c("gray50", "#ee5863", "#6999cd")) +
theme_fivethirtyeight() +
theme(axis.title = element_text()) +
labs(x = "Width", y = "Height") geom_annotate()library(ggthemes)
ggplot(moma_dim, aes(x = width_cm, y = height_cm, colour = hw_cat)) +
geom_point(alpha = .5, show.legend = FALSE) +
ggtitle("MoMA Paintings, Tall and Wide") +
scale_colour_manual(name = "",
values = c("gray50", "#ee5863", "#6999cd")) +
theme_fivethirtyeight() +
theme(axis.title = element_text()) +
labs(x = "Width", y = "Height") +
annotate(x = 200, y = 380, geom = "text",
label = "Taller than\nWide", color = "#ee5863",
size = 5, family = "Lato", hjust = 1, fontface = 2) +
annotate(x = 375, y = 100, geom = "text",
label = "Wider than\nTall", color = "#6999cd",
size = 5, family = "Lato", hjust = 0, fontface = 2)For this exrcise I’ve decided to look for a correlation between an age, when the artist made his/her masterpiece and the remainer of their life. To my surprise, despite some significant correlation of younger age = longer life, there were some really tragic stories of very young artitsts dying prematurely…
moma_age <- moma %>%
mutate(artist_age = year_created - artist_birth_year,
artist_life = artist_death_year-year_created,
life_expectancy = case_when(
artist_life < 1 ~ "short life",
artist_life < 10 ~ "decent life",
artist_life < 26 ~ "good life",
artist_life >25 ~ "long life"))
library(ggthemes)
ggplot(moma_age, aes(x = artist_age, y = artist_life, color=life_expectancy)) +
geom_point(alpha = .5, show.legend = FALSE) +
ggtitle("MoMA Paintings, Artist Age vs Life Left") +
scale_colour_manual(name = "",
values = c("gray50", "#00b0f6", "#ee5863", "#6999cd")) +
theme_fivethirtyeight() +
theme(axis.title = element_text()) +
labs(x = "Artist's Age", y = "Life Left") +
annotate(x = 50, y = 65, geom = "text",
label = "Early Start", color = "#ee5863",
size = 5, family = "Lato", hjust = 1, fontface = 2) +
annotate(x = 75, y = 25, geom = "text",
label = "Late Bloomer", color = "#6999cd",
size = 5, family = "Lato", hjust = 0, fontface = 2)## Warning: Removed 633 rows containing missing values (geom_point).