题目:编程实现一个类“MyFSDataInputStream”,该类继承“org.apache.hadoop.fs.FSDataInputStream”,要求如下:实现按行读取HDFS中指定文件的方法“readLine()”,如果读到文件末尾,则返回空,否则返回文件一行的文本。
1.import java.io.IOException;2.import org.apache.hadoop.conf.Configuration;3.import org.apache.hadoop.fs.FileSystem;4.import org.apache.hadoop.fs.FSDataInputStream;5.import org.apache.hadoop.fs.Path;6.7.public class MyFSDataInputStream extends FSDataInputStream {8. private static final int BUFFER_SIZE = 4096;9. private byte[] buffer;10. private int pos;11. private int end;12. private boolean eof;13.14. public MyFSDataInputStream(FileSystem fs, Path file) throws IOException {15. super(fs.open(file));16. buffer = new byte[BUFFER_SIZE];17. pos = 0;18. end = 0;19. eof = false;20. }21.22. public String readNextLine() throws IOException {23. StringBuilder sb = new StringBuilder();24. int b = -1;25. boolean found = false;26.27. while (!eof && !found) {28. if (pos >= end) {29. fillBuffer();30. }31. if (end == -1) {32. eof = true;33. }34. while (pos < end) {35. b = buffer[pos++];36. if (b == '\r' || b == '\n') {37. found = true;38. break;39. }40. sb.append((char) b);41. }42. }43.44. if (sb.length() == 0 && eof) {45. return "";46. } else {47. return sb.toString();48. }49. }50.51. private void fillBuffer() throws IOException {52. end = in.read(buffer);53. pos = 0;54. }55.56. public static void main(String[] args) throws IOException {57. if (args.length < 1) {58. System.err.println("Usage: MyFSDataInputStream ");59. System.exit(1);60. }61.62. String filePath = args[0];63.64. Configuration conf = new Configuration();65. Path path = new Path(filePath);66. FileSystem fs = FileSystem.get(path.toUri(), conf);67.68. MyFSDataInputStream in = new MyFSDataInputStream(fs, path);69. String line;70. while ((line = in.readNextLine()) != null) {71. System.out.println(line);72. }73. in.close();74. }75.}
命令如下:
1.cd /usr/local/hadoop/share3.ls5.vim MyFSDataInputStream.java 7.javac -classpath /usr/local/hadoop/share/hadoop/common/hadoop-common-2.7.4.jar MyFSDataInputStream.java 9.ls11.HADOOP_CLASSPATH=. hadoop MyFSDataInputStream
运行结果:
来源地址:https://blog.csdn.net/weixin_46390041/article/details/129805188