Compare commits

...

2 Commits

Author SHA1 Message Date
fae65591e6 finished ch12.4
All checks were successful
Test Gitea Actions / first (push) Successful in 22s
2025-02-18 15:28:22 -07:00
e0bc88cf97 finished ch12.3 2025-02-18 11:27:40 -07:00
5 changed files with 624 additions and 120 deletions

View File

@ -0,0 +1,37 @@
# name of the workflow.
# this is optional.
name: Test Gitea Actions
# events that will trigger this workflow.
# here, we only have "pull_request", so the workflow will run
# whenever we create a pull request.
# other examples: [push] and [pull_request, push]
on: [push]
# each workflow must have at least one job.
# jobs run in parallel by default (we can change that).
# each job groups together a series of steps to accomplish a purpose.
jobs:
# name of the job
first:
# the platform or OS that the workflow will run on.
runs-on: ubuntu-latest
# series of steps to finish the job.
steps:
# name of the step.
# steps run sequentially.
# this is optionale
- name: checkout
# each step can either have "uses" or "run".
# "uses" run an action written somewhere other than this workflow .
# usually from the community.
# this action checks out the repo code to the runner (instance)
# running the action
uses: actions/checkout@v3
# another step.
# this step runs a bash (Ubuntu's default shell) command
- name: list files
run: ls

View File

@ -27,16 +27,30 @@
"state": {
"type": "markdown",
"state": {
"file": "Test_Organization.md",
"file": "Tests.md",
"mode": "source",
"source": false
},
"icon": "lucide-file",
"title": "Test_Organization"
"title": "Tests"
}
},
{
"id": "ec34d2df2728a299",
"type": "leaf",
"state": {
"type": "markdown",
"state": {
"file": "minigrep/README.md",
"mode": "source",
"source": false
},
"icon": "lucide-file",
"title": "README"
}
}
],
"currentTab": 1
"currentTab": 2
}
],
"direction": "vertical"
@ -179,12 +193,14 @@
"command-palette:Open command palette": false
}
},
"active": "53b36d00b704136e",
"active": "ec34d2df2728a299",
"lastOpenFiles": [
"Test Controls.md",
"Test_Organization.md",
"Tests.md",
"Writing_Tests.md",
"Tests.md",
"minigrep/README.md",
"minigrep/src/lib.rs",
"Test_Organization.md",
"Test Controls.md",
"Traits.md",
"Modules and Use.md",
"Modules.md",
@ -206,7 +222,6 @@
"Primitives.md",
"Project Organization.md",
"README.md",
"Reducing_Code_Duplication.md",
"does_not_compile.svg",
"Untitled.canvas",
"Good and Bad Code/Commenting Pratices",

View File

@ -6,23 +6,23 @@ Rust's speed, safety, single binary output and cross-platform support makes it a
In the simplest use case, `grep` searches a specified file for a specified string.
To do this `grep` takes as its arguments a file path and a string. Then it reads the file, finds lines in that file that contain/match to the string argument nad prints those lines
To do this `grep` takes as its arguments a file path and a string. Then it reads the file, finds lines in that file that contain/match to the string argument and prints those lines
This project will also show along the way how to use the terminal features that many other command line tools use
It will include reading the value of an environemnt variable to allow the user to configure the behavior of our tool
It will include reading the value of an environment variable to allow the user to configure the behavior of our tool
This project will also go into printing error messages to the standard error console stream (`stderr`) instead of the standard output (`stdout`)
We to that so the user can redirect successful output to a file while still seeing error messages onscreen ofr example
We to that so the user can redirect successful output to a file while still seeing error messages onscreen for example
One Rust community member, Andrew Gallant, has already created a fully featured, very fast version of `grep` called `ripgrep`
This version will be fairly simple.
## Inital Goal: Accept Command Line Arguments
## Initial Goal: Accept Command Line Arguments
We can do this when running our program with `cargo run` by two hyphens to indicate the follwing arguments are for our program rather than for cargo
We can do this when running our program with `cargo run` by two hyphens to indicate the following arguments are for our program rather than for cargo
- A string to search for
- A path to a file to search in
@ -31,14 +31,14 @@ Here is an example running
```bash
$ cargo run -- searchstring example-filename.txt
```
The program generated y `cargo new` cannot process argments we give it.
The program generated y `cargo new` cannot process arguments we give it.
There are some existing libraries on [crates.io](https://crates.io/) can help with writing a program that accepts command line arguments.
But since its a learning opporutnity I (with the help of the rust programming language) will be implementing this capability
But since its a learning opportunity I (with the help of the rust programming language) will be implementing this capability
### Reading the Arguments Values
We will need the `std::env::args` function prvided in Rust's std library.
We will need the `std::env::args` function provided in Rust's std library.
This function reutnrs an iterator of the command line arguments passed to the program
@ -50,7 +50,7 @@ For now the two important details about iterators:
we bring the `std::env` module into scope using the `use` statement so we can use its `args` function
Note thatthe `std::env::args` function is nestd in two levels in two levels of modules.
Note that the `std::env::args` function is nested in two levels in two levels of modules.
In cases where the desired function is nested in more than one module, we chose to bring the parent module into scope rather than the function
@ -65,11 +65,11 @@ If your program needs to accept arguments containing invalid Unicode, use `std::
This function produces an iterator that produces `0sString` values instead of `String` values
We chose to use `std::env:args` for simplicit because `0sString` values differ per platform and are more complex to work with than `String` values.
We chose to use `std::env:args` for simplicity because `0sString` values differ per platform and are more complex to work with than `String` values.
On the first line of `main` we call `env::args` and then `collect` is immediately used to turn the iterator into a vector containing all the values produced by the iterator.
We can use the `collect` function to create many kinds of collection, so we eplicitly annotate the tpye of `args` to specify that we want a vector of strings.
We can use the `collect` function to create many kinds of collection, so we explicitly annotate the type of `args` to specify that we want a vector of strings.
When using `collect` and other functions like it we need to annotate because Rust isn't able to infer the kind of collection desired
@ -96,16 +96,16 @@ $ cargo run -- needle haystack
```
Notice that the first value in the vector is `"target/debug/mingrep"`, this is the name of our binary.
This matches the behavior if the arguemtns list in C, letting programs they were invoked in their execution.
This matches the behavior if the arguments list in C, letting programs they were invoked in their execution.
Its often convenient ot have access to the program name in case you want ot print it in messages or change the behavior of the program based on what command line alias was sed to invoke the program.
Its often convenient to have access to the program name in case you want to print it in messages or change the behavior of the program based on what command line alias was sed to invoke the program.
For this program we will ignore it and save only the tow arguments we need.
### Saving the Argument Values in Variables
The program is currently able to access the values specified as command line args
Now we should save the two arguments in variables so that we can use them later and throuht the program
Now we should save the two arguments in variables so that we can use them later and through the program
We should do this by `&args[1]`
@ -113,7 +113,7 @@ The first arg that `minigrep` takes is the string we are searching for, so we pu
The second arg is the file path, so we put a reference to the second argument in the var `file_path`.
We will temporarily print the values of these varaibles to prove that the code is working as intended
We will temporarily print the values of these variables to prove that the code is working as intended
Here is what the output would look like at this point
```
@ -130,7 +130,7 @@ Now we will add functionality to read the specified in the `file_path` argument.
First we will create a sample file to test it with lots of repeating words in a small file
Here is an Emily Diskinson poem that we will use. It wil be stored in *poem.txt* at the root level of the project
Here is an Emily Dickinson poem that we will use. It will be stored in *poem.txt* at the root level of the project
```
I'm nobody! Who are you?
Are you nobody, too?
@ -160,7 +160,7 @@ fn main() {
The first thing to note is that we bring in `std::fs` to handle files, which is part of the std library
`main` now contants `fs::read_to_string` which takes the `file_path`, this opens that file associated and reutrns a value of `std::io::Result<String>` that contains the file's contents
`main` now contents `fs::read_to_string` which takes the `file_path`, this opens that file associated and returns a value of `std::io::Result<String>` that contains the file's contents
Afterwards we add a temporary `println!` statement that prints the vale of `contents` after the file is read so that we can check for correctness.
@ -188,49 +188,49 @@ To an admiring bog!
```
As you can see it works as expected.
But as you can see the `main` function has multiple responsibilities: generally functions are cleareer and easier to maintain if each function is responsible ofr only one idea.
But as you can see the `main` function has multiple responsibilities: generally functions are clearer and easier to maintain if each function is responsible for only one idea.
The other problem is that we are not handling errors as well as we could.
These arent big problems while the program is small, but as the program grows it will be harder to fix them cleanly.
These aren't big problems while the program is small, but as the program grows it will be harder to fix them cleanly.
It is good practice to begin refactoring early on when developing because it is easier to refactor smaller amounts of code
## Third Goal: Refactor to Imporve Modularity and Error Handling
We ha ve 4 problems to fix
## Third Goal: Refactor to Improve Modularity and Error Handling
We ha we 4 problems to fix
1. Our `main` function now performs two tasks
- Parsing arguemnts
- Parsing arguments
- reading files
It would be better to separate tasks in thr `main` function. As a function gians responsibilities, it becomes more difficult to reason aobut harder to test and harder to change withot breaking one of its parts.
It would be better to separate tasks in the `main` function. As a function gains responsibilities, it becomes more difficult to reason about harder to test and harder to change without breaking one of its parts.
It is best to separate functionality so each function is responsibile for one task
It is best to separate functionality so each function is responsible for one task
2. This is partly replated to the first problem, although `query` and `file_path` are config variables to our problem, variables like `contents` are used to perform the program's logic.
As `main` gets longer, the more variables we will need to bring into scope; the more variables we have in scope, which makes it harder to track the purpose of each.
It is best to group the config variables into one struct to make their purpose clear.
3. We use `expect` to print an error mesage when the reading the file fails, but the error message prints `Should have been able to read the file`. This is unclear what the error is.
3. We use `expect` to print an error message when the reading the file fails, but the error message prints `Should have been able to read the file`. This is unclear what the error is.
The file can fail in a number of ways for example the file could be missing, or we may not have permission to open it. Currently we wold print the same error regardless of the situation or type of error.
The file can fail in a number of ways for example the file could be missing, or we may not have permission to open it. Currently we would print the same error regardless of the situation or type of error.
4. We use `expect` to handle an error and if the user runs our program without specifiying enough arguments, they will get an index out of bounds error from Rust that dosnt clearly explain the problem.
4. We use `expect` to handle an error and if the user runs our program without specifying enough arguments, they will get an index out of bounds error from Rust that doesn't clearly explain the problem.
It would be best if all the erro-handling code was in one place, so that future maintainers had only one place o consult the code if the error handling logic needed to change.
It would be best if all the error-handling code was in one place, so that future maintainers had only one place to consult the code if the error handling logic needed to change.
Having all of the error handling code in one place will also ensure that when we print messages they will make sense to our end users.
### Separation of Concerns for Binary Projects
The organizational problem of allocating responsibility for multiple tasks to the `main` function is common to many binary projects.
As a result the Rust community has developed guidelines for splitting the separate concerns of a binar program when `main` starts getting large.
As a result the Rust community has developed guidelines for splitting the separate concerns of a binary program when `main` starts getting large.
The process has te follwing steps
- Split your program into a *main.rs* file and a *lib.s* file and move the program's logic to *lib.rs*
The process has the following steps
- Split your program into a *main.rs* file and a *lib.rs* file and move the program's logic to *lib.rs*
- As long as your command line parsing logic is small it can remain in *main.rs*
- When the common line parsing logic starts getting complicated, extarct it from *main.rs* then move it to *lib.rs*
- When the common line parsing logic starts getting complicated, extract it from *main.rs* then move it to *lib.rs*
The responsibilities that remian in the `main` function after this process should be limited to:
The responsibilities that remain in the `main` function after this process should be limited to:
- Calling the command line pasing logic with the argument values
- Setting up any other configuration
- Calling a `run` function in *lib.rs*
@ -242,8 +242,8 @@ Due to not being able to test the `main` function directly, this struct lets you
The small amount of code that remains in *main.rs* will be small enough to verify its correctness by reading it
#### Extracting the Arugment Parser
We will first extract the functionality for pasing args into a function that main will call to prepare for moving the command line parsing logic to *src/lib.rs*
#### Extracting the Argument Parser
We will first extract the functionality for passing args into a function that main will call to prepare for moving the command line parsing logic to *src/lib.rs*
Here is how the start of `main` should now look
```rust
@ -265,18 +265,18 @@ fn parse_config(args: &[String]) -> (&str, &str) {
We are still collecting the command line args into a vector, but instead of assigning the arg value at indexes to the variables we instead pass the whole vector to `parse_config` function.
The `parse_config` function then holds the logic that determines which arg goes in which varaible and asses the values back to `main`.
The `parse_config` function then holds the logic that determines which arg goes in which variable and asses the values back to `main`.
We still create `query` and `file_path` in `main` but it no longer has the responsiblity of determining how the command line arguments and values correspond.
We still create `query` and `file_path` in `main` but it no longer has the responsibility of determining how the command line arguments and values correspond.
This rework may seem like overkill but we are refactoring in small incremental steps.
After making this change it is good practice to verify that the arguments parsing still works
It is good to check your progress often to identif the cause of problems when they occur
It is good to check your progress often to identify the cause of problems when they occur
#### Grouping Configuration Values
We can take another small step to improve the `parse_config` function furter.
We can take another small step to improve the `parse_config` function further.
At the moment were returning a tuple then immediately breaking that tuple into individual parts again.
@ -284,9 +284,9 @@ This is a sign that we might not have the right abstraction yet.
Another indicator is that shows there is room for improvement is the `config` part of `parse_config`.
This imples that the tow values we retrun are related and are both part of one configuration value.
This implies that the tow values we return are related and are both part of one configuration value.
We are currently not conveying this meaning in the structure of the data other than by grouping the two valus into a tuple.
We are currently not conveying this meaning in the structure of the data other than by grouping the two values into a tuple.
Instead we should put the two values into one struct and give each of the struct fields a meaningful name.
@ -327,25 +327,25 @@ The signature of `parse_config` now inidcates that it reutrns a `Config` value6
The body of `parse_config`, which is where we used to return string slices that reference `String` values in `args`
The `args` variable in `main` is the owner of the argument values and is only letting the `parse_config` function borrow tem, which means we'd voilate Rust's borrowing rules if `Config` tried to take ownership of th values in `args`
The `args` variable in `main` is the owner of the argument values and is only letting the `parse_config` function borrow them, which means we'd violate Rust's borrowing rules if `Config` tried to take ownership of the values in `args`
There are a number of ways we could mange the `String` data, the easiest though inefficient, route is to call the `clone` method on the values
This makes a full copy of the data for the `Config` instance to own, which takes more time and memory that sotring a reference to the string data.
However, cloning the data also makes the code very straightforward because we don't have to manage the lifetimes of the references; in this circumstance, giving up a little performance to gain this simplicitiy is a worthwhile trade-ff
However, cloning the data also makes the code very straightforward because we don't have to manage the lifetimes of the references; in this circumstance, giving up a little performance to gain this simplicity is a worthwhile trade-ff
##### The Trade-Offs of Using `clone`
There is a tendency to avoid using `clone` to fix ownership problems becuase of its runtime cost.
There is a tendency to avoid using `clone` to fix ownership problems because of its runtime cost.
The next chapter will go over how to use more efficient methods in this tpye of situation.
The next chapter will go over how to use more efficient methods in this type of situation.
For now it is ok to copy a few strings to continue making progress because you will make thse copies only once and your file path and query string are very small.
It is better to have a working program that is a bit inefficient than to try to hyperoptimize code on the first pass.
It is better to have a working program that is a bit inefficient than to try to hyper optimize code on the first pass.
With more experience it will be easier to start with the most effcient solutionm for now it is perfectly acceptable to call `clone`.
With more experience it will be easier to start with the most efficient solution for now it is perfectly acceptable to call `clone`.
#### Creating a Constructor for Config
@ -353,13 +353,13 @@ So far we extracted the logic responsible for parsing the command line arguments
Doing this helps us see that the `query` and `file_path` values are related and that relationship should be conveyed in our code.
We then added a `Config` struc to name the related purpose of `query` and `file_path` and to be able to return the values' names as fields that are named the structs.
We then added a `Config` struct to name the related purpose of `query` and `file_path` and to be able to return the values' names as fields that are named the structs.
Now the purpose of the `parse_config` function is to create a `Config` instance so instead we should change `parse_config` from a plain function to a function named `new` that is associated with the `Config` struct.
Making tihs chnage will make the code more idiomatic
Making this change will make the code more idiomatic
We can create instances of tpyes in the std library such as `String` by calling `String::new`
We can create instances of types in the std library such as `String` by calling `String::new`
Similarly by changing `Config` by calling `Config::new`
@ -405,7 +405,7 @@ This will not help our end users understand and what they should do instead
### Improving the Error Message
First we will add a check in the `new` function that will verify that the slice is long enough before accessing index 1 and 2
If the slice is not long enough then the program panics and idsplays a better error message.
If the slice is not long enough then the program panics and displays a better error message.
```rust
// --snip--
fn new(args: &[String]) -> Config {
@ -433,7 +433,7 @@ note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace
```
This output is more of a reasonable error message.
However we also have extraneous info that we dont want to give to our users.
However we also have extraneous info that we don't want to give to our users.
Perhaps the technique of calling a panic is more appropriate for a programming problem then a usage problem as discussed in [ch9](../Error%20Handling.md).
@ -442,12 +442,11 @@ Instaed we would use a different technique, returning a `Result` that indicates
#### Returning a Result Instead of Calling `panic!`
Instead we will return a `Result` value that will contain a `Config` instance in the successful case and will describe the problem in the error case.
We are also going to change the function name from `new` to `build` becuase many programmer expect `new` funcons to never fail
We are also going to change the function name from `new` to `build` because many programmer expect `new` functions to never fail
When `Config::build` communicates with `main` we can use the `Result` type to signal there was a problem.
Thne we can change `main` to convert an `Err` variant into a mroe practicla error for our users without the surrounding test about `thread 'main'` and `RUST_BACKTRACE` that a call to `panic!` causes.
Then we can change `main` to convert an `Err` variant into a more practical error for our users without the surrounding test about `thread 'main'` and `RUST_BACKTRACE` that a call to `panic!` causes.
Here is how we would make these changes to build.
Note that this will not run without changs to `main` as well
@ -471,7 +470,7 @@ Our `build` function returns a `Result` with a `Config` instance in the success
Ourerror values will always be string literals that have the `'static` lifetime.
There are two major changes in the body of the function
- instead of caling `panic!` when the user doesnt pass enough argumetns we now return an `Err` value
- instead of calling `panic!` when the user doesn't pass enough arguments we now return an `Err` value
- We wrapped the `Config` return value in an `Ok`
These changes make the function conform to its new type signature.
@ -503,21 +502,21 @@ In this we used `upwrap_or_else` which is defined on `Result<T, E>` by the std l
using this method allows us to define some custom, non-`panic!` error handling
If the `Result` is an `Ok` valuem then this method's behavior is simialr to `unrap` and it returns the inner value that `Ok` is wrapping
If the `Result` is an `Ok` value then this method's behavior is similar to `unrap` and it returns the inner value that `Ok` is wrapping
If the value is an `Err` value, this method calls the code in the *closure*, which is an anonymous function we define and pass as an arugment to `unwrap_or_else`.
If the value is an `Err` value, this method calls the code in the *closure*, which is an anonymous function we define and pass as an argument to `unwrap_or_else`.
Closures will be covered in the next chapter (ch13)
For now you can think of it as it will pass the inner value of an `Err` to our closure in the arguemnt `err` that appears between the vertical ppes.
For now you can think of it as it will pass the inner value of an `Err` to our closure in the argument `err` that appears between the vertical pipes.
The code in the closure can then use the `err` value when it runs.
We also brought in the `process` from the std library into scope.
The code in the closure that will be run in the error case is only two lines:
1. we print the `err` value
2. call the `process::exit`
5. we print the `err` value
6. call the `process::exit`
`process::exit` function will stop the program immediately and return the number that was passed as the exit status code
@ -539,9 +538,9 @@ Now that we finished refactoring the configuration parsing
Lets separate the programs logic
As stated in the [Separation of Concerns for Binary Projects](#separation-of-concerns-for-binary-projects), we extract a fnction named `run` that will hold all the logic currently in the `main` function that isn't involved with setting up configuration or handling errors.
As stated in the [Separation of Concerns for Binary Projects](#separation-of-concerns-for-binary-projects), we extract a function named `run` that will hold all the logic currently in the `main` function that isn't involved with setting up configuration or handling errors.
When this is done `main` will be concise and east to verify by inspection as well we will also write tests fro all the other logic
When this is done `main` will be concise and east to verify by inspection as well we will also write tests from all the other logic
Here is the extracted `run` function for now will be small and will imcrementally improve the extracting runction
```rust
@ -586,18 +585,18 @@ fn run(config: Config) -> Result<(), Box<dyn Error>> {
Ok(())
}
```
The three singificant changes are:
- Changed the return tpye of the `run` function to `Result<(), Box<dyn Error>>` The function previosly returned the unit type `()` and we keep that as the value returned in the `Ok` case
The three significant changes are:
- Changed the return type of the `run` function to `Result<(), Box<dyn Error>>` The function previously returned the unit type `()` and we keep that as the value returned in the `Ok` case
For the error tpye we used the *trait object* `Box<dyn Error>` and we brought in `std::error::Error`
For the error type we used the *trait object* `Box<dyn Error>` and we brought in `std::error::Error`
We will cover trait objects later (ch17)
For now know that `Box<dyn Error>` means the function will return a type that impleetns the `Error` trait, but we don't have to specify what the particular tpe the return value will be.
For now know that `Box<dyn Error>` means the function will return a type that implements the `Error` trait, but we don't have to specify what the particular type the return value will be.
This flexibility to return error vlaues that may be of different tpyes in differnet error cases
This flexibility to return error values that may be of different types in different error cases
The `dyn` keyowrd is short for *dynamic*
The `dyn` keyword is short for *dynamic*
- The call to `expect` in favor of the `?` operator [(can b found here)](../Error%20Handling.md#A-Shorcut-for-Popagation-Errors:-the-?-Operator)
@ -649,4 +648,380 @@ Rust tells us that our code ignored the `Result` value and the `Result` value mi
But we are not checking whether or not there was an error and the compiler reminds us that we probably meant to have some error-handling code here
#### Handling Erros Returned from `run` in main
#### Handling Errors Returned from `run` in main
We will check for errors and handle them using a technique similar to one used in `Config::build` before but with a slight difference
```rust
fn main() {
// --snip--
println!("Searching for {}", config.query);
println!("In file {}", config.file_path);
if let Err(e) = run(config) {
println!("Application error: {e}");
process::exit(1);
}
}
```
In this we use `if let` instead of `unwrap_or_else` to check whether `run` returns an `Err` value and it will then call `process::exit(1)` if there is an error value
The `run` function doesn't return a value that we want to `unwrap` in the same way that `Config::build` returns the `Config` instance.
Due to `run` returning a `()` in a success case we only care about detecting an error so we don't need `unwrap_or_else` to return the unwrapped unit value (`()`)
The bodies of the `if let` and the `unwrap_or_else` functions are the same in both cases; print the error and exit.
### Splitting Code into a Library Crate
Now we will look into splitting the *src/main.rs* file and putting some of the code into the `src/lib.rs` file.
This is order to enable us to test code and have a *src/main.rs* with less responsibility.
Here is what we will move form `main` to *src/lib.rs*:
- The `run` function definition
- The relevant `use` statements (ones that are used in the bodies of the other functions)
- The definition of `Config`
- The `Config::build` method definition
Here is what the *src/lib.rs* files should look like.
Note that this is abbreviated and that it will not compile without modifying *src/main.rs*
```rust
use std::error::Error;
use std::fs;
pub struct Config {
pub query: String,
pub file_path: String,
}
impl Config {
pub fn build(args: &[String]) -> Result<Config, &'static str> {
// --snip--
}
}
pub fn run(config: Config) -> Result<(), Box<dyn Error>> {
// --snip--
}
```
Note the use of keyword `pub` on `Config`, on its fields, on the `build` method and on the `run` function.
This is a very liberal use of `pub`
Now we have a library crate that has a public API that can be tested on
Here are the modifications to *src/main.rs* to bring the code that was taken out of it to bring it back into scope
```rust
use std::env;
use std::process;
use minigrep::Config;
fn main() {
// --snip--
if let Err(e) = minigrep::run(config) {
// --snip--
}
}
```
We add `use minigrep::Config` line to bring the `Coinfig` type from the library crate into the binary crate's scope
And we prefix the `run` function with our crate name so that it can also be used
This work sets up for success in the future.
### Sixth Goal: Developing the Library's Functionality with Test-Driven Development
Now that the code and logic has been extracted out of *main.rs* and left behind the argument collecting and error handling
It is now much easier and possible to write tests for the core functionality of the code.
We can now call functions directly with various arguments and check the return values without having to call our binary from the command line.
This goal's section will focus on adding the search logic to the `minigrep` program using the test-driven development (TDD) process with the steps:
1. Write a test that fails and run it to make sure it fails for the reason you expect
2. Write or modify just enough code to make the new test pass
3. Refactor the code you just added or changed and make sure the tests continue to pass
4. Repeat form step 1
Even though this is one of many was to write software, TDD can help drive code design
Writing the tests before you write code that makes the test pass helps to maintain high test coverage throughout the process.
We will test drive the implementation of the functionality that will actually do the searching for the query string in the file contents and produce a list of lines that match the query
We will add this in the function called `search`
#### Writing a Failing Test
First lets remove the `println!` statements because we don't need them anymore to check the program's behavior.
Next we'll add a `tests` module with a test function the same as [The Test Anatomy](../Writing_Tests.md) from before.
This test will specify the behavior we want the `search` function to have
- It will take a query and the test to search
- it will return only the lines form the text that contain the query
Here is the test (it goes in *src/lib.rs*)
Note it will not compile yet
```rust
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn one_result() {
let query = "duct";
let contents = "\
Rust:
safe, fast, productive.
Pick three.";
assert_eq!(vec!["safe, fast, productive."], search(query, contents));
}
}
```
This test will search for the string `"duct"`
The test we will search three lines only one that contains `"duct"`
Note that the backslash after the opening double quote tells Rust not to put a newline character at the beginning of the contents of this string literal.
We will then assert that the value returned from the `search` function only contains the line we expect
We aren't yet able o run this test and watch it fail because the function it needs in order to compile and run doesn't exist yet.
In accordance with TDD principles we will add just enough code to compile and run by adding a definition of the `search` function that always returns an empty vector that doesn't match with the one in the assert.
Here the what the function will look like at this point
```rust
pub fn search<'a>(query: &str, contents: &'a str) -> Vec<&'a str> {
vec![]
}
```
Notice that we need to define an explicit lifetime `'a` in the signature of `search` and use that lifetime with the `contents` argument and the return value.
This case specifies that the vector returned should contain string slices that reference slices of the argument `contents` (rather than the argument `query`).
It also could be said that the returned value will live as long as what was passed into the `contents` arguments.
This is important the data referenced *by* a slice needs to be valid for the reference to be valid.
If the compiler assumes we are making string slices of `query` rather than `contents` it will do its safety checking incorrectly.
If we forget lifetime annotations and try to compile we will get this error
```
$ cargo build
Compiling minigrep v0.1.0 (file:///projects/minigrep)
error[E0106]: missing lifetime specifier
--> src/lib.rs:28:51
|
28 | pub fn search(query: &str, contents: &str) -> Vec<&str> {
| ---- ---- ^ expected named lifetime parameter
|
= help: this function's return type contains a borrowed value, but the signature does not say whether it is borrowed from `query` or `contents`
help: consider introducing a named lifetime parameter
|
28 | pub fn search<'a>(query: &'a str, contents: &'a str) -> Vec<&'a str> {
| ++++ ++ ++ ++
For more information about this error, try `rustc --explain E0106`.
error: could not compile `minigrep` (lib) due to 1 previous error
```
Rust can't possibly know which of the two args we need so we need to tell it explicitly.
Due to `contents` is the arguments that contains all of our text we want to return the parts of that text that match.
This shows that `contents` is the argument that should be connected to the return value using the lifetime syntax.
Other programming languages don't require you to connect the arguments to return value, but this practice will get easier over time with more exposure.
Here is the output of the test
```
$ cargo test
Compiling minigrep v0.1.0 (file:///projects/minigrep)
Finished `test` profile [unoptimized + debuginfo] target(s) in 0.97s
Running unittests src/lib.rs (target/debug/deps/minigrep-9cd200e5fac0fc94)
running 1 test
test tests::one_result ... FAILED
failures:
---- tests::one_result stdout ----
thread 'tests::one_result' panicked at src/lib.rs:44:9:
assertion `left == right` failed
left: ["safe, fast, productive."]
right: []
note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace
failures:
tests::one_result
test result: FAILED. 0 passed; 1 failed; 0 ignored; 0 measured; 0 filtered out; finished in 0.00s
error: test failed, to rerun pass `--lib`
```
This test fails exactly as expected
#### Writing Code to Pass the Test
Our test is failing because we always return an empty vector
To fix this and implement `search`, the program needs to follow these steps:
1. Iterate through each line of the contents
2. Check whether the line contains the query string
3. If it does add it to the list of values we are returning
4. If it doesn't do nothing
5. Return the list of result that match
##### Iterating Through Lines with the lines Method
Rust includes a helpful method to handle line-by-line iterations of strings, named `lines`
Here it is how it would be used in this case
```rust
pub fn search<'a>(query: &str, contents: &'a str) -> Vec<&'a str> {
for line in contents.lines() {
// do something with line
}
}
```
The `lines` method returns an iterator.
For now recall that when used in a `for` loop with an iterator to run some code on each item in a collection
##### Searching each Line for the Query
Next we will check whether the current line contains our query string.
Strings have a helpful method named `contains` that does this for us.
now lets add a call to the `contains` method in the `search` function
Here is the updated function
Note it still will not compile
```rust
pub fn search<'a>(query: &str, contents: &'a str) -> Vec<&'a str> {
for line in contents.lines() {
if line.contains(query) {
// do something with line
}
}
}
```
At the moment we are only building up functionality
To get the code to compile we need to return a value from the body as we indicated in the function signature
##### Storing Matching Lines
To finish this function we need a way to store the matching lines that we want to return.
To do this for now we can make a mutable vector before the `for` loop and call the `push` method to store a `line` in the vector
After the `for` loop the vector will be returned
Here is what the function like after adding the `vector` and the `push` method
Note it will now compile
```rust
pub fn search<'a>(query: &str, contents: &'a str) -> Vec<&'a str> {
let mut results = Vec::new();
for line in contents.lines() {
if line.contains(query) {
results.push(line);
}
}
results
}
```
Now the `search` function should return only the lines that contain `query` and the test should pass
Here is the output when running the test at this point
```
$ cargo test
Compiling minigrep v0.1.0 (file:///projects/minigrep)
Finished `test` profile [unoptimized + debuginfo] target(s) in 1.22s
Running unittests src/lib.rs (target/debug/deps/minigrep-9cd200e5fac0fc94)
running 1 test
test tests::one_result ... ok
test result: ok. 1 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out; finished in 0.00s
Running unittests src/main.rs (target/debug/deps/minigrep-9cd200e5fac0fc94)
running 0 tests
test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out; finished in 0.00s
Doc-tests minigrep
running 0 tests
test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out; finished in 0.00s
```
As we can now see the test passes.
At this point we could consider opportunities for refactoring the implementation of the search function while keeping the tests passing to maintain the same functionality
The code in the search function isn't too bad but it doesn't take advantage of some useful features that iterators have
This will be further improved in the iterators chapter
##### Using the Search Function in the `run` Function
Now that the `search` function is working and tested, we now need to call `search` from our `run` function.
We need to pass the `config.query` value and the `contents` that `run` reads from the file to search function.
Then `run` will print each line returned from `search`
Here is what run will look like now
```rust
pub fn run(config: Config) -> Result<(), Box<dyn Error>> {
let contents = fs::read_to_string(config.file_path)?;
for line in search(&config.query, &contents) {
println!("{line}");
}
Ok(())
}
```
We are still using a `for` loop to return each line form `search` and print it
Now that the entire program should work
Lets try it with first with a word that should return exactly one line from the Emily Dickinson poem: *frog*
Here is the output
```
$ cargo run -- frog poem.txt
Compiling minigrep v0.1.0 (file:///projects/minigrep)
Finished `dev` profile [unoptimized + debuginfo] target(s) in 0.38s
Running `target/debug/minigrep frog poem.txt`
How public, like a frog
```
Now lets try with a word that will match multiple lines like *body*
```
$ cargo run -- body poem.txt
Compiling minigrep v0.1.0 (file:///projects/minigrep)
Finished `dev` profile [unoptimized + debuginfo] target(s) in 0.0s
Running `target/debug/minigrep body poem.txt`
I'm nobody! Who are you?
Are you nobody, too?
How dreary to be somebody!
```
Then lets make sure we don' get any lines when we search for a word that isn't anywhere such as *monomorphization*
```
$ cargo run -- monomorphization poem.txt
Compiling minigrep v0.1.0 (file:///projects/minigrep)
Finished `dev` profile [unoptimized + debuginfo] target(s) in 0.0s
Running `target/debug/minigrep monomorphization poem.txt`
```
Now that it is finished we will finished off with a demonstration on how to work with environment variables and how to print a std error, both are useful when you are writing command line programs

61
minigrep/src/lib.rs Normal file
View File

@ -0,0 +1,61 @@
use std::fs;
use std::error::Error;
// refactor 9
pub struct Config {
pub query: String,
pub file_path: String,
}
impl Config {
pub fn build(args: &[String]) -> Result<Config, &'static str> {
if args.len() < 3 {
return Err("not enough arguments");
}
let query = args[1].clone();
let file_path = args[2].clone();
Ok(Config { query, file_path })
}
}
pub fn run(config: Config) -> Result<(), Box<dyn Error>> {
let contents = fs::read_to_string(config.file_path)?;
// refactor 10
// println!("With text:\n{contents}")
for line in search(&config.query, &contents) {
println!("{line}");
}
Ok(())
}
pub fn search<'a>(query: &str, contents: &'a str) -> Vec<&'a str> {
// original that only can fail
// vec![]
for line in contents.lines() {
if line.contains(query) {
// do something with line
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn one_result() {
let query = "duct";
let contents = "\
Rust:
safe, fast, productive.
Pick three.";
assert_eq!(vec!["safe, fast, productive."], search(query, contents));
}
}

View File

@ -1,7 +1,9 @@
use std::env;
use std::fs;
use std::process;
use std::error::Error;
// refactor 9
// use std::fs;
// use std::error::Error;
use minigrep::Config;
fn main() {
let args: Vec<String> = env::args().collect();
@ -21,37 +23,50 @@ fn main() {
// let config = Config::new(&args);
// recfactor 6
let config = Config::build(&args).unwrap_or_else(|err| {
println!("Problem parsing arguments: {err}");
let config = Config::build(&args);
// refactor 8
//.unwrap_or_else(|err| {
// println!("Problem parsing arguments: {err}");
// process::exit(1);
// });
// refactor 10
// println!("Searching for {}", config.query);
// println!("In the file {}", config.file_path);
// refactor 8
if let Err(e) = minigrep::run(config) {
// needed for helping the user
println!("Application error: {e}");
process::exit(1);
});
println!("Searching for {}", config.query);
println!("In the file {}", config.file_path);
}
// refactor 7
// // --snip--
// let contents = fs::read_to_string(config.file_path).expect("Should have been able to read the file");
// println!("With text:\n{contents}");
run(config);
// refactor 8
// run(config);
}
// refactor 7
fn run(config: Config) -> Result<(), Box<dyn Error>> {
let contents = fs::read_to_string(config.file_path)?;
// refactor 9
// // refactor 7
// fn run(config: Config) -> Result<(), Box<dyn Error>> {
// let contents = fs::read_to_string(config.file_path)?;
println!("With text:\n{contents}")
// println!("With text:\n{contents}")
Ok(())
}
// Ok(())
// }
// refactor 3
struct Config {
query: String,
file_path: String,
}
// refactor 9
// // refactor 3
// struct Config {
// query: String,
// file_path: String,
// }
// refactor 1
// fn parse_config(args: &[String]) -> (&str, &str) {
@ -69,29 +84,30 @@ struct Config {
// Config { query, file_path }
// }
// refactor 9
// refactor 3
impl Config {
// // refactor 3
// fn new(args: &[String]) -> Config {
// // refactor 4
// if args.len() < 3 {
// panic!("not enough arguments");
// }
// let query = args[1].clone();
// let file_path = args[2].clone();
// impl Config {
// // // refactor 3
// // fn new(args: &[String]) -> Config {
// // // refactor 4
// // if args.len() < 3 {
// // panic!("not enough arguments");
// // }
// // let query = args[1].clone();
// // let file_path = args[2].clone();
// Config { query, file_path }
// }
// // Config { query, file_path }
// // }
// refactor 5
fn build(args: &[String]) -> Result<Config, &'static str> {
if args.len() < 3 {
return Err("not enough arguments");
}
// // refactor 5
// fn build(args: &[String]) -> Result<Config, &'static str> {
// if args.len() < 3 {
// return Err("not enough arguments");
// }
let query = args[1].clone();
let file_path = args[2].clone();
// let query = args[1].clone();
// let file_path = args[2].clone();
Ok(Config { query, file_path })
}
}
// Ok(Config { query, file_path })
// }
// }