import org.apache.flink.api.common.functions.FilterFunction;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.common.JobExecutionResult;
import org.apache.flink.api.java.ExecutionEnvironment;
public class FlinkReadTextFile {
public static void main(String[] args) throws Exception {
ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
DataSet<String> data = env.readTextFile("file:///Users/***/Documents/test.txt");
data.filter(new FilterFunction<String>() {
@Override
public boolean filter(String value) throws Exception {
return value.startsWith("五芳斋美");
}
})
.writeAsText("file:///Users/***/Documents/test01.txt");
JobExecutionResult res = env.execute();
}
}
三、实例
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.streaming.api.scala._
object SocketWindowWordCount {
/** Main program method */
def main(args: Array[String]): Unit ={ // the port to connect to
// val port: Int = try {
// ParameterTool.fromArgs(args).getInt("port")
// } catch {
// case e: Exception => {
// System.err.println("No port specified. Please run 'SocketWindowWordCount --port <port>'")
// return
// }
// }
// get the execution environment
val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
// get input data by connecting to the socket
val text = env.socketTextStream("localhost", 9000, '\n')
// parse the data, group it, window it, and aggregate the counts
val windowCounts = text
.flatMap { w => w.split("\\s") }
.map { w => WordWithCount(w, 1) }
.keyBy("word")
.timeWindow(Time.seconds(5), Time.seconds(1))
.sum("count")
// print the results with a single thread, rather than in parallel
windowCounts.print().setParallelism(1)
env.execute("Socket Window WordCount")
}
// Data type for words with count
case class WordWithCount(word: String, count: Long)
}