library(sqldf)
data <- read.csv("data.csv")
head(data)
zdata <- sqldf("select * from data t where (select count(1) from data where V2 = t.V2 and V3 = t.V3)>2")
print(zdata)
方法一: 使用的SQL语句为:
select * from data t where (select count(1) from data where V2=t.V2 and V3=t.V3)>2 得到晒选的效果如下所示:
方法二:提取查询
v1=c(1,21,5,5,8,8)
v2=c(2,5,2,8,2,2)
v3=c(3,6,3,9,3,3)
v4=c(4,7,3,4,7,4)
data=data.frame(v1,v2,v3,v4)
head(data)
data v 2 v 5 = p a s t e ( d a t a v2v5=paste(data v2v5=paste(datav2,data v 5 ) a = t a b l e ( d a t a v5) a=table(data v5)a=table(datav2v5)
tarid=names(a)[a>2]
res=data[data$v2v5%in%tarid,]
print(res)
方法三:条件筛选subset
a1 <- c("M","b","b","c")
b1 <- c(1,2,3,4)
da <- cbind(a1,b1)
da <- as.data.frame(da)
print(da)
mz <- subset(da,a1=='b'&b1==2)
print(mz)
方法四:which条件查询
a1 <- c("M","b","b","c")
b1 <- c(1,2,3,4)
da <- cbind(a1,b1)
da <- as.data.frame(da)
print(da)
zda<-da[which(a1 == 'b'),]
print(zda)
install.packages("sqldf")
library(sqldf)
AA=read.csv("C:\\Users\\Administrator\\Desktop\\data.csv")
str(AA)
data1=sqldf("select 门店,日期,count(1) as 当日订单数 from AA group by 日期,门店")
2.对每一天中,不同门店的订单数量进行判断,找出每天门店的订单数量大于三的门店。
data2=sqldf("select 门店,日期,count(1) as 当日订单数 from AA group by 日期,门店 having 当日订单数>=3")
data3=sqldf("select 门店,count(1) as 月订单数量大于3的天数 from (select 门店,日期,count(1) as 当日订单数 from AA group by 日期,门店 having 当日订单数>=3) group by 门店 having 月订单数量大于3的天数>=25")
案例二: 数据筛选合并 原始数据如下:
key time
a 1月
a 1月
a 2月
b 1月
b 1月
b 1月 合并数据如下:
key time count
a 1月 2
a 2月 1
b 1月 3
实现代码如下:
key=c("a","a","a","b","b","b")
time=c("1月","1月","2月","1月","1月","1月")
data=cbind(key,time)
data=as.data.frame(data)
data
library(sqldf)
zdata=sqldf("select key,time,count(1) from data group by key,time")
print(zdata)