The R
Language
A powerful open-source language for statistical computing, data analysis, and visualization — loved by statisticians, researchers, and data scientists worldwide.
What is R?
R is a free, open-source programming language and software environment for statistical computing, data analysis, and graphical visualization. It was created by Ross Ihaka and Robert Gentleman at the University of Auckland, New Zealand.
Originally derived from the S programming language, R has grown into one of the world's most popular languages for data science, used extensively in academia, research, finance, healthcare, and the tech industry.
R runs on Windows, macOS, Linux, and is supported by an active community that has contributed over 20,000 packages to CRAN (Comprehensive R Archive Network).
Why Choose R?
R in Action
# R Basics — Variables, Vectors & Functions # Assign variables using <- or="</span"> name <- "Data Scientist" year <- 2024 # Vectors — the fundamental data type in R scores <- c(85, 92, 78, 96, 88) # Vectorized operations (no loops needed!) scaled <- scores * 1.1 # Built-in functions mean(scores) # 87.8 sd(scores) # standard deviation summary(scores) # min, max, quartiles # Define your own function greet <- function(name, lang = "R") { paste("Hello,", name, "- Welcome to", lang) } greet("Alice") # "Hello, Alice - Welcome to R" # Data frame — R's table-like structure df <- data.frame( name = c("Alice", "Bob", "Carol"), score = c(90, 85, 92), pass = c(TRUE, TRUE, TRUE) )->
# Data Visualization with ggplot2 library(ggplot2) # Basic scatter plot ggplot(mtcars, aes(x = wt, y = mpg, color = factor(cyl))) + geom_point(size = 3, alpha = 0.8) + geom_smooth(method = "lm", se = FALSE) + labs( title = "Car Weight vs Fuel Efficiency", x = "Weight (1000 lbs)", y = "Miles per Gallon", color = "Cylinders" ) + theme_minimal() # Bar chart ggplot(diamonds, aes(x = cut, fill = clarity)) + geom_bar(position = "dodge") + scale_fill_brewer(palette = "Blues") + theme_classic() # Histogram with density overlay ggplot(iris, aes(x = Sepal.Length, fill = Species)) + geom_histogram(binwidth = 0.3, alpha = 0.6, position = "identity") + facet_wrap(~Species) + theme_bw()
# Statistical Analysis in R # Linear regression model <- lm(mpg ~ wt + hp + cyl, data = mtcars) summary(model) # coefficients, R², p-values # T-test — comparing two groups group_a <- rnorm(30, mean = 50, sd = 10) group_b <- rnorm(30, mean = 55, sd = 10) t.test(group_a, group_b) # ANOVA aov_model <- aov(Sepal.Length ~ Species, data = iris) summary(aov_model) # Chi-squared test chisq.test(table(mtcars$cyl, mtcars$gear)) # Logistic regression logit <- glm(am ~ wt + hp, data = mtcars, family = binomial()) summary(logit)
# Data Manipulation with dplyr (tidyverse) library(dplyr) library(tidyr) # Pipe operator: |> chains operations cleanly result <- starwars |> filter(!is.na(height), species == "Human") |> select(name, height, mass, gender) |> mutate(bmi = mass / (height/100)^2) |> arrange(desc(height)) # Group by and summarize summary_df <- mtcars |> group_by(cyl) |> summarize( avg_mpg = mean(mpg), avg_hp = mean(hp), n = n() ) # Join two data frames joined <- left_join(orders, customers, by = "customer_id") # Pivot data from wide to long long_df <- wide_df |> pivot_longer(cols = starts_with("Q"), names_to = "quarter", values_to = "revenue")

0 Comments