There was a programming-"contest" on comp.lang.c and I wanted to show^^^^^^^^^^^^^^^^^^^^^ C++
the simpler code in C, here it is:
#include <iostream>
#include <sstream>
#include <iomanip>
#include <optional>
#include <algorithm>
using namespace std;
static optional<size_t> parse( const char *str );
int main( int argc, char **argv )
{
if( argc < 3 )
return EXIT_FAILURE;
optional<size_t>
pRows = parse( argv[1] ),
pCols = parse( argv[2] );
if( !pRows || !pCols )
return EXIT_FAILURE;
size_t rows = *pRows, cols = *pCols;
optional<size_t> pClip( rows * cols );
if( argc >= 4 && !(pClip = parse( argv[3] )) )
return EXIT_FAILURE;
size_t clip = min( *pClip, rows * cols );
streamsize width = (ostringstream() << clip).str().length();
for( size_t row = 1; row <= min( rows, clip ); ++row )
{
bool head = true;
for( size_t value = row; value <= clip; value += rows, head =
false )
cout << " "sv.substr( head, !head ) << right << setw( width ) << value;
cout << endl;
}
}
static optional<size_t> parse( const char *str )
{
istringstream iss( str );
size_t ret;
iss >> ret;
if( !iss || !iss.eof() )
return nullopt;
return ret;
}
C++ really rocks since you've to deal with much less details than in C.
There was a programming-"contest" on comp.lang.c and I wanted to show
the simpler code in C[++], here it is:
[...]
C++ really rocks since you've to deal with much less details than in C.
Concerning your subject; it wouldn't appear to me to use the term
"beauty" in context of C++. C++ inherited so much ugly syntax and
concepts (from "C") and it added yet more syntactic infelicities.
(And I'm saying that as someone who liked to program in C++, also professionally, for many many years.)
There was a programming-"contest" on comp.lang.c and I wanted to show
the simpler code in C, here it is:
#include <iostream>
#include <sstream>
#include <iomanip>
#include <optional>
#include <algorithm>
using namespace std;
static optional<size_t> parse( const char *str );
int main( int argc, char **argv )
{
if( argc < 3 )
return EXIT_FAILURE;
optional<size_t>
pRows = parse( argv[1] ),
pCols = parse( argv[2] );
if( !pRows || !pCols )
return EXIT_FAILURE;
size_t rows = *pRows, cols = *pCols;
optional<size_t> pClip( rows * cols );
if( argc >= 4 && !(pClip = parse( argv[3] )) )
return EXIT_FAILURE;
size_t clip = min( *pClip, rows * cols );
streamsize width = (ostringstream() << clip).str().length();
for( size_t row = 1; row <= min( rows, clip ); ++row )
{
bool head = true;
for( size_t value = row; value <= clip; value += rows, head =
false )
cout << " "sv.substr( head, !head ) << right << setw( width
) << value;
cout << endl;
}
}
static optional<size_t> parse( const char *str )
{
istringstream iss( str );
size_t ret;
iss >> ret;
if( !iss || !iss.eof() )
return nullopt;
return ret;
}
C++ really rocks since you've to deal with much less details than in C.
On 3/12/2026 2:24 AM, Bonita Montero wrote:
There was a programming-"contest" on comp.lang.c and I wanted to show
the simpler code in C, here it is:
#include <iostream>
#include <sstream>
#include <iomanip>
#include <optional>
#include <algorithm>
using namespace std;
static optional<size_t> parse( const char *str );
int main( int argc, char **argv )
{
if( argc < 3 )
return EXIT_FAILURE;
optional<size_t>
pRows = parse( argv[1] ),
pCols = parse( argv[2] );
if( !pRows || !pCols )
return EXIT_FAILURE;
size_t rows = *pRows, cols = *pCols;
optional<size_t> pClip( rows * cols );
if( argc >= 4 && !(pClip = parse( argv[3] )) )
return EXIT_FAILURE;
size_t clip = min( *pClip, rows * cols );
streamsize width = (ostringstream() << clip).str().length();
for( size_t row = 1; row <= min( rows, clip ); ++row )
{
bool head = true;
for( size_t value = row; value <= clip; value += rows, head =
false )
cout << " "sv.substr( head, !head ) << right <<
setw( width ) << value;
cout << endl;
}
}
static optional<size_t> parse( const char *str )
{
istringstream iss( str );
size_t ret;
iss >> ret;
if( !iss || !iss.eof() )
return nullopt;
return ret;
}
C++ really rocks since you've to deal with much less details than in C.
Is your strategy to just ignore reality, and keep making bogus claims
that - for this challenge at least - you can't support?
#include <stdlib.h>
#include <stdio.h>
int main(int argc, char *argv[]) {
if (argc < 3 || argc > 4) {
printf("Enter 2 or 3 arguments:\n$./prog rows columns [stop]\n");
return 0;
}
int rows = atoi(argv[1]);
int cols = atoi(argv[2]);
int max = (argc == 4) ? (atoi(argv[3])) : (rows * cols);
char cw[12];
int colwidth = sprintf(cw,"%d ",rows * cols);
for (int r = 1; r <= rows; r++) {
if (r <= max) {
int nbr = r;
printf("%*d", colwidth, nbr);
for (int i = 0; i < (cols - 1); i++) {
((nbr += rows) <= max) ? (printf("%*d", colwidth,
nbr)) : (0) ;
}
printf("\n");
}
}
return 0;
}
But real C++ code is usually multiple times shorter, mostly because of generic programming. C really sucks since you have to flip every bit
on your own.
On 3/12/26 15:03, Bonita Montero wrote:
But real C++ code is usually multiple times shorter, mostly because of
generic programming. C really sucks since you have to flip every bit
on your own.
C++ really sucks because you can't know who flip the bits.
Here, do that in C:
static optional<size_t> parse( const char *str )
{
size_t ret;
if( from_chars_result fcr = from_chars( str, str + strlen( str ), ret ); (bool)fcr.ec || *fcr.ptr )
return nullopt;
return ret;
}
On 3/12/2026 10:13 AM, Bonita Montero wrote:
Here, do that in C:
static optional<size_t> parse( const char *str )
{
size_t ret;
if( from_chars_result fcr = from_chars( str, str + strlen( str ), >> ret ); (bool)fcr.ec || *fcr.ptr )
return nullopt;
return ret;
}
Explain in detail what it does.
Am 12.03.2026 um 15:43 schrieb DFS:
On 3/12/2026 10:13 AM, Bonita Montero wrote:
Here, do that in C:
static optional<size_t> parse( const char *str )
{
size_t ret;
if( from_chars_result fcr = from_chars( str, str + strlen( str >>> ), ret ); (bool)fcr.ec || *fcr.ptr )
return nullopt;
return ret;
}
Explain in detail what it does.
If that isn't self-explanatory stick with C.
I've got a task for you: Do the same in C:
#include <iostream>
#include <regex>
#include <fstream>
#include <vector>
#include <algorithm>
#include <vector>
using namespace std;
int main( int argc, char **argv )
{
if( argc < 2 )
return EXIT_FAILURE;
ifstream ifs( argv[1] );
static regex rxNameTel( "^\\s*\"([^\"]*)\"\\s*\"([^\"]*)\"\\s*$" );
struct name_tel { string name, tel; };
vector<name_tel> phoneList;
while( !ifs.eof() )
{
string line;
getline( ifs, line );
match_results<string::const_iterator> sm;
if( regex_match( line, sm, rxNameTel ) )
phoneList.emplace_back( string( sm[1].first, sm[1].second
), string( sm[2].first, sm[2].second ) );
}
sort( phoneList.begin(), phoneList.end(),
[]( const name_tel &left, const name_tel &right ) { return left.name < right.name; } );
for( name_tel &phone : phoneList )
cout << "\"" << phone.name << "\"\t\"" << phone.tel << "\"" <<
endl;
}
1. Read a file and parse it with ne mentioned regex-pattern.
2. Split both parts of every line in two strings.
3. Sort the "vector" according to the first string.
4. Print it.
I guess you don't manage to do that with less than five times the work.
Every external lib allowed.
Give me the file you used.
Am 12.03.2026 um 16:22 schrieb DFS:
Give me the file you used.
The input file looks like this:
"White House" "001 202 456 1414"
"Mother" "0049 211 151395"
"Mickey Mouse" "001 123 456 7890"
The output shout look sorted:
"White House" "001 202 456 1414"
"Mickey Mouse" "001 123 456 7890"
"Mother" "0049 211 151395"
static regex rxNameTel( "^\\s*\"([^\"]*)\"\\s*\"([^\"]*)\"\\s*$" );
Give me the file you used.
Here, do that in C:
static optional<size_t> parse( const char *str )
{
size_t ret;
if( from_chars_result fcr = from_chars( str, str + strlen( str ), ret
); (bool)fcr.ec || *fcr.ptr )
return nullopt;
return ret;
}
Am 12.03.2026 um 15:43 schrieb DFS:
On 3/12/2026 10:13 AM, Bonita Montero wrote:
Here, do that in C:
static optional<size_t> parse( const char *str )
{
size_t ret;
if( from_chars_result fcr = from_chars( str, str + strlen( str ),
ret ); (bool)fcr.ec || *fcr.ptr )
return nullopt;
return ret;
}
Explain in detail what it does.
If that isn't self-explanatory stick with C.
I've got a task for you: Do the same in C:
#include <iostream>
#include <regex>
#include <fstream>
#include <vector>
#include <algorithm>
#include <vector>
using namespace std;
int main( int argc, char **argv )
{
if( argc < 2 )
return EXIT_FAILURE;
ifstream ifs( argv[1] );
static regex rxNameTel( "^\\s*\"([^\"]*)\"\\s*\"([^\"]*)\"\\s*$" );
struct name_tel { string name, tel; };
vector<name_tel> phoneList;
while( !ifs.eof() )
{
string line;
getline( ifs, line );
match_results<string::const_iterator> sm;
if( regex_match( line, sm, rxNameTel ) )
phoneList.emplace_back( string( sm[1].first, sm[1].second ), string(
sm[2].first, sm[2].second ) );
}
sort( phoneList.begin(), phoneList.end(),
[]( const name_tel &left, const name_tel &right ) { return left.name <
right.name; } );
for( name_tel &phone : phoneList )
cout << "\"" << phone.name << "\"\t\"" << phone.tel << "\"" << endl;
}
1. Read a file and parse it with ne mentioned regex-pattern.
2. Split both parts of every line in two strings.
3. Sort the "vector" according to the first string.
4. Print it.
Am 12.03.2026 um 09:32 schrieb Janis Papanagnou:
Concerning your subject; it wouldn't appear to me to use the term
"beauty" in context of C++. C++ inherited so much ugly syntax and
concepts (from "C") and it added yet more syntactic infelicities.
(And I'm saying that as someone who liked to program in C++, also
professionally, for many many years.)
My perception of beauty is just as subjective as your perception of the
ugly aspects of C++. I like the span of lowleven means in C++ that are inherited from C to higher level abstractions C++ has inherited from
other more moderrn language.
And if you don't the subjektive part of beauty or ugly parts of this
language C++ is several times more effective than C, i.e. you need
only a fraction of the code size while mostly maintaining the same performance.
Am 12.03.2026 um 16:22 schrieb DFS:
Give me the file you used.
Better take this:
[snip list of names of people and telephone numbers]
The output should be this:
[snip list of names of people and telephone numbers]
I've got a task for you: Do the same in C:
#include <iostream>
#include <regex>
#include <fstream>
#include <vector>
#include <algorithm>
#include <vector>
using namespace std;
int main( int argc, char **argv )
{
if( argc < 2 )
return EXIT_FAILURE;
ifstream ifs( argv[1] );
static regex rxNameTel( "^\\s*\"([^\"]*)\"\\s*\"([^\"]*)\"\\s*$" );
struct name_tel { string name, tel; };
vector<name_tel> phoneList;
while( !ifs.eof() )
{
string line;
getline( ifs, line );
match_results<string::const_iterator> sm;
if( regex_match( line, sm, rxNameTel ) )
phoneList.emplace_back( string( sm[1].first, sm[1].second
), string( sm[2].first, sm[2].second ) );
}
sort( phoneList.begin(), phoneList.end(),
[]( const name_tel &left, const name_tel &right ) { return left.name < right.name; } );
for( name_tel &phone : phoneList )
cout << "\"" << phone.name << "\"\t\"" << phone.tel << "\"" <<
endl;
}
1. Read a file and parse it with ne mentioned regex-pattern.
2. Split both parts of every line in two strings.
3. Sort the "vector" according to the first string.
4. Print it.
I guess you don't manage to do that with less than five times the work.
Every external lib allowed.
You didn't bother to check if the file was opened.
This is certainly a task that can be done simply using a shell
script and the standard POSIX utility set. awk(1) would be
a good starting point; it's possible that it can be done with
a single invocation of the posix 'sort' utility.
Of course, the lack of any in-line documentation (e.g. comments) is
a typical defect in your C++ code.
On 12.03.26 16:48, Bonita Montero wrote:
Am 12.03.2026 um 16:22 schrieb DFS:
Give me the file you used.
Better take this:
[snip list of names of people and telephone numbers]
The output should be this:
[snip list of names of people and telephone numbers]
I see there are real existing people in that list.
Do you have the consent of these persons to disseminate
their names and telephone numbers?
You didn't bother to check if the file was opened.
I'll try that in C if you commit to trying the following in C++. Deal?
--------------------------------------------------------------------------
* read in a list of words from a file here: >https://people.sc.fsu.edu/~jburkardt/datasets/words/words.html
* pick N random words from that list and put them in an array (OK if
there are a few dupes - if you can get no dupes that's better)
* sort and print the array of randoms, adding a blank line each time the
1st letter changes
(note: I just now wrote this. It's not old code.)
usage is
$./randwords filename N
$./randwords special_english.txt 200
--------------------------------------------------------------------------
Am 12.03.2026 um 20:25 schrieb Janis Papanagnou:
On 12.03.26 16:48, Bonita Montero wrote:
Am 12.03.2026 um 16:22 schrieb DFS:
Give me the file you used.
Better take this:
[snip list of names of people and telephone numbers]
The output should be this:
[snip list of names of people and telephone numbers]
I see there are real existing people in that list.
Do you have the consent of these persons to disseminate
their names and telephone numbers?
These are no real people.
Am 12.03.2026 um 16:22 schrieb DFS:
Give me the file you used.
Better take this:
DFS <nospam@dfs.com> writes:
I'll try that in C if you commit to trying the following in C++. Deal?
-------------------------------------------------------------------------- >> * read in a list of words from a file here:
https://people.sc.fsu.edu/~jburkardt/datasets/words/words.html
* pick N random words from that list and put them in an array (OK if
there are a few dupes - if you can get no dupes that's better)
* sort and print the array of randoms, adding a blank line each time the
1st letter changes
(note: I just now wrote this. It's not old code.)
usage is
$./randwords filename N
$./randwords special_english.txt 200
--------------------------------------------------------------------------
A POSIX version:
#include <errno.h>
#include <fcntl.h>
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/stat.h>
int
qsortcompare(const void *a, const void *b)
{
const char* ca = *(const char**)a;
const char* cb = *(const char**)b;
return strcmp(ca,cb);
}
int
main(int argc, const char **argv)
{
int fd;
struct stat st;
char *words;
char *cp;
char *end;
char **wordlist;
char **sorted;
char ch = '\0';
size_t wordcount = 0u;
size_t i;
size_t n;
if (argc < 3) {
fprintf(stderr, "Usage: %s <wordlist> <N>\n", argv[0]);
return 1;
}
n = strtoul(argv[2], &cp, 0);
if ((cp == argv[2]) || (*cp != '\0')) {
fprintf(stderr, "%s: <N> argument '%s' must be fully numeric in base 8, 10 or 16\n", argv[0], argv[2]);
return 1;
}
if (stat(argv[1], &st) == -1) {
fprintf(stderr, "%s: Unable to open '%s': %s\n", argv[0], argv[1], strerror(errno));
return 2;
}
fd = open(argv[1], O_RDONLY, 0);
if (fd == -1) {
fprintf(stderr, "%s: Unable to open '%s': %s\n", argv[0], argv[1], strerror(errno));
return 2;
}
words = mmap(NULL, st.st_size, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0ul);
if (words == MAP_FAILED) {
fprintf(stderr, "%s: Unable to mmap '%s': %s\n", argv[0], argv[1], strerror(errno));
return 2;
}
close(fd);
cp = words;
for(i = 0ul; i < st.st_size; i++, cp++) {
if (*cp == '\n') wordcount++;
}
wordlist = malloc(wordcount * sizeof(const char *));
if (wordlist == NULL) {
fprintf(stderr, "%s: Unable to allocate %zu bytes\n", argv[0], wordcount * sizeof(const char *));
return 3;
}
cp = words;
end = cp + st.st_size;
i = 0u;
while (cp < end) {
wordlist[i++] = cp;
for(; (cp < end) && (*cp != '\n'); cp++) {}
*cp++ = '\0';
}
sorted = malloc(n * sizeof(const char *));
if (sorted == NULL) {
fprintf(stderr, "%s: Unable to allocate %zu bytes\n", argv[0], n * sizeof(const char *));
return 3;
}
srand(time(NULL));
for(i = 0ul; i < n; i++) {
sorted[i] = wordlist[rand() % wordcount];
}
qsort(sorted, n, sizeof(sorted[0]), qsortcompare);
for(size_t w = 0ul; w < n; w++) {
if (ch && (ch != sorted[w][0])) fputc('\n', stdout);
fprintf(stdout, "%s\n", sorted[w]);
ch = sorted[w][0];
}
free(wordlist);
munmap(words, st.st_size);
return 0;
}
This is an interesting statement. - I first saw the typical
"Max Mustermann" that is in our country the prototype of an
artificial test entry with obvious test number 0123-4567890.
Then I picked an arbitrary entry and looked it up; I found
that person with exactly the associated telephone number. -
Where did you get these real names from?
That's 50 in and 50 out.
So what's the RegEx for?
//Floating point exception (core dumped)...
for(i = 0ul; i < n; i++) {
sorted[i] = wordlist[rand() % wordcount];
}
free(wordlist);
munmap(words, st.st_size);
On 3/12/2026 8:54 PM, Scott Lurndal wrote:
DFS <nospam@dfs.com> writes:
I took out all 7 instances of error trapping, and it throws:
Floating point exception (core dumped)
at line 61 or 62
======================================================================== #include <errno.h>
#include <fcntl.h>
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/stat.h>
int
qsortcompare(const void *a, const void *b)
{
const char* ca = *(const char**)a;
const char* cb = *(const char**)b;
return strcmp(ca,cb);
}
int
main(int argc, const char **argv)
{
int fd;
struct stat st;
char *words;
char *cp;
char *end;
char **wordlist;
char **sorted;
char ch = '\0';
size_t wordcount = 0u;
size_t i;
size_t n;
n = strtoul(argv[2], &cp, 0);
fd = open(argv[1], O_RDONLY, 0);
words = mmap(NULL, st.st_size, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0ul);
close(fd);
cp = words;
for(i = 0ul; i < st.st_size; i++, cp++) {
if (*cp == '\n') wordcount++;
}
wordlist = malloc(wordcount * sizeof(const char *));
cp = words;
end = cp + st.st_size;
i = 0u;
while (cp < end) {
wordlist[i++] = cp;
for(; (cp < end) && (*cp != '\n'); cp++) {}
*cp++ = '\0';
}
sorted = malloc(n * sizeof(const char *));
srand(time(NULL));
//Floating point exception (core dumped)
for(i = 0ul; i < n; i++) {
sorted[i] = wordlist[rand() % wordcount];
}
qsort(sorted, n, sizeof(sorted[0]), qsortcompare);
for(size_t w = 0ul; w < n; w++) {
if (ch && (ch != sorted[w][0])) fputc('\n', stdout);
fprintf(stdout, "%s\n", sorted[w]);
ch = sorted[w][0];
}
free(wordlist);
munmap(words, st.st_size);
return 0;
}
========================================================================
#include <errno.h>
#include <fcntl.h>
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/stat.h>
int
qsortcompare(const void *a, const void *b)
{
const char* ca = *(const char**)a;
const char* cb = *(const char**)b;
return strcmp(ca,cb);
}
int
main(int argc, const char **argv)
{
int fd;
struct stat st;
char *words;
char *cp;
char *end;
char **wordlist;
char **sorted;
char ch = '\0';
size_t wordcount = 0u;
size_t i;
size_t n;
if (argc < 3) {
fprintf(stderr, "Usage: %s <wordlist> <N>\n", argv[0]);
return 1;
}
n = strtoul(argv[2], &cp, 0);
if ((cp == argv[2]) || (*cp != '\0')) {
fprintf(stderr, "%s: <N> argument '%s' must be fully numeric
in base 8, 10 or 16\n", argv[0], argv[2]);
return 1;
}
if (stat(argv[1], &st) == -1) {
fprintf(stderr, "%s: Unable to open '%s': %s\n", argv[0], >> argv[1], strerror(errno));
return 2;
}
fd = open(argv[1], O_RDONLY, 0);
if (fd == -1) {
fprintf(stderr, "%s: Unable to open '%s': %s\n", argv[0], >> argv[1], strerror(errno));
return 2;
}
words = mmap(NULL, st.st_size, PROT_READ|PROT_WRITE, MAP_PRIVATE, >> fd, 0ul);
if (words == MAP_FAILED) {
fprintf(stderr, "%s: Unable to mmap '%s': %s\n", argv[0], >> argv[1], strerror(errno));
return 2;
}
close(fd);
cp = words;
for(i = 0ul; i < st.st_size; i++, cp++) {
if (*cp == '\n') wordcount++;
}
wordlist = malloc(wordcount * sizeof(const char *));
if (wordlist == NULL) {
fprintf(stderr, "%s: Unable to allocate %zu bytes\n",
argv[0], wordcount * sizeof(const char *));
return 3;
}
cp = words;
end = cp + st.st_size;
i = 0u;
while (cp < end) {
wordlist[i++] = cp;
for(; (cp < end) && (*cp != '\n'); cp++) {}
*cp++ = '\0';
}
sorted = malloc(n * sizeof(const char *));
if (sorted == NULL) {
fprintf(stderr, "%s: Unable to allocate %zu bytes\n",
argv[0], n * sizeof(const char *));
return 3;
}
srand(time(NULL));
for(i = 0ul; i < n; i++) {
sorted[i] = wordlist[rand() % wordcount];
}
qsort(sorted, n, sizeof(sorted[0]), qsortcompare);
for(size_t w = 0ul; w < n; w++) {
if (ch && (ch != sorted[w][0])) fputc('\n', stdout);
fprintf(stdout, "%s\n", sorted[w]);
ch = sorted[w][0];
}
free(wordlist);
munmap(words, st.st_size);
return 0;
}
Am 13.03.2026 um 05:31 schrieb DFS:
//Floating point exception (core dumped)...
for(i = 0ul; i < n; i++) {
sorted[i] = wordlist[rand() % wordcount];
}
free(wordlist);
munmap(words, st.st_size);
BIG mistake: If you crash in the first part the memory in
the second part isn't freed, at least under Windows 3.11. ;-)
I took out all 7 instances of error trapping, and it throws:
Floating point exception (core dumped)
at line 61 or 62
output is something like:
$ ./randwords special_english.txt 200
1477 words read in
200 random words extracted
1. above
2. accept
3. after
4. against
5. agency
6. ammunition
7. anger
8. anniversary
9. army
10. arrive
11. art
12. artillery
13. automobile
14. autumn
15. bed
16. below
17. bleed
18. blow
19. blue
20. boat
21. boil
22. bread
23. bridge
24. brown
25. business
26. cancer
27. claim
28. clean
29. cloud
30. cloud
31. cloud
32. combine
33. compare
34. conflict
35. consider
36. contain
37. correct
38. credit
39. criticize
40. cross
41. crowd
42. customs
43. decide
44. demand
...
Am 12.03.2026 um 21:18 schrieb DFS:
output is something like:
$ ./randwords special_english.txt 200
1477 words read in
200 random words extracted
1. above
2. accept
3. after
...
What's the format of the input file ? Also numbered lines ?
Am 13.03.2026 um 02:19 schrieb Janis Papanagnou:
This is an interesting statement. - I first saw the typical
"Max Mustermann" that is in our country the prototype of an
artificial test entry with obvious test number 0123-4567890.
Then I picked an arbitrary entry and looked it up; I found
that person with exactly the associated telephone number. -
Where did you get these real names from?
The list was generated with ChatGpt.
On 3/12/2026 10:14 PM, Bonita Montero wrote:
Am 13.03.2026 um 02:19 schrieb Janis Papanagnou:
This is an interesting statement. - I first saw the typical
"Max Mustermann" that is in our country the prototype of an
artificial test entry with obvious test number 0123-4567890.
Then I picked an arbitrary entry and looked it up; I found
that person with exactly the associated telephone number. -
Where did you get these real names from?
The list was generated with ChatGpt.
Oh my. Beware, and always double, and triple, check its reams of code.
Am 13.03.2026 um 05:31 schrieb DFS:
I took out all 7 instances of error trapping, and it throws:
Floating point exception (core dumped)
at line 61 or 62
Take my signal_scope<> class, it makes signals thread-specific:
thread_local jmp_buf jb;
signal_scope<SIGFPE> fpeExc( +[]( int, siginfo_t *, void * )
{
static const char PrintThis[] = "caught!\n";
(void)write( 1, PrintThis, sizeof PrintThis - 1 );
siglongjmp( jb, 1 );
} );
if( int ret = sigsetjmp( jb, 1 ); !ret )
...
else
...
Here's the source of the signal_scope class:
#pragma once
#include <cstdlib>
#include <variant>
#include <mutex>
#include <cassert>
#include <unistd.h>
#include <signal.h>
#include <setjmp.h>
template<int SigNo>
struct signal_scope
{
static_assert(SigNo == SIGILL || SigNo == SIGFPE || SigNo == SIGSEGV || SigNo == SIGBUS || SigNo == SIGTRAP, "only sychronous signals");
using handler_fn = bool (*)( int );
using siginfo_handler_fn = bool (*)( int, siginfo_t *, void * );
using handler_variant = std::variant<int, handler_fn, siginfo_handler_fn>;
signal_scope( handler_variant handler = handler_variant() ) noexcept;
~signal_scope();
void operator =( handler_variant handler ) noexcept;
static void fallback( handler_variant handler ) noexcept;
static void re_init( const sigset_t *pSet, int flags );
private:
inline static std::mutex g_mtxFallback;
inline static handler_variant g_fallback = handler_variant();
inline static thread_local handler_variant t_handler = handler_variant();
handler_variant m_handlerBefore;
inline static struct init
{
init();
~init();
void reset( const sigset_t *pSet, int flags, bool old );
void dummy() {}
struct sigaction m_saBefore;
} g_init;
static void action( int sig, siginfo_t *info, void *uContext ) noexcept;
static bool callHandler( const handler_variant &handler, int sig, siginfo_t *info, void *uContext );
};
template<int SigNo>
signal_scope<SigNo>::signal_scope( handler_variant handler ) noexcept :
m_handlerBefore( t_handler )
{
(void)g_init;
t_handler = handler;
}
template<int SigNo>
inline signal_scope<SigNo>::~signal_scope()
{
t_handler = m_handlerBefore;
}
template<int SigNo>
inline void signal_scope<SigNo>::operator =( handler_variant handler ) noexcept
{
t_handler = handler;
}
template<int SigNo>
void signal_scope<SigNo>::fallback( handler_variant handler ) noexcept
{
using namespace std;
lock_guard lock( g_mtxFallback );
g_fallback = handler;
}
template<int SigNo>
void signal_scope<SigNo>::re_init( const sigset_t *pSet, int flags )
{
g_init.reset( pSet, flags, false );
}
template<int SigNo>
signal_scope<SigNo>::init::init()
{
reset( nullptr, 0, true );
}
template<int SigNo>
signal_scope<SigNo>::init::~init()
{
sigaction( SigNo, &m_saBefore, nullptr );
}
template<int SigNo>
void signal_scope<SigNo>::init::reset( const sigset_t *pSet, int flags,
bool old )
{
struct sigaction sa;
sa.sa_sigaction = action;
if( pSet )
sa.sa_mask = *pSet;
else
sigfillset( &sa.sa_mask );
sa.sa_flags = flags | SA_SIGINFO;
sigaction( SigNo, &sa, old ? &m_saBefore : nullptr );
}
template<int SigNo>
void signal_scope<SigNo>::action( int sig, siginfo_t *info, void
*uContext ) noexcept
{
using namespace std;
if( callHandler( t_handler, sig, info, uContext ) ) [[likely]]
return;
handler_variant fallback;
{
lock_guard lock( g_mtxFallback );
fallback = g_fallback;
}
callHandler( fallback, sig, info, uContext );
}
template<int SigNo>
bool signal_scope<SigNo>::callHandler( const handler_variant &handler,
int sig, siginfo_t *info, void *uContext )
{
if( holds_alternative<handler_fn>( handler ) ) [[likely]]
return get<handler_fn>( handler )( sig );
if( holds_alternative<siginfo_handler_fn>( handler ) ) [[likely]]
return get<siginfo_handler_fn>( handler )( sig, info, uContext );
return false;
}
* read in a list of words from a file here: https://people.sc.fsu.edu/~jburkardt/datasets/words/words.html
* pick N random words from that list and put them in an array (OK if
there are a few dupes - if you can get no dupes that's better)
* sort and print the array of randoms, adding a blank line each time the
1st letter changes
well, is that your code? Or the AI's code? Need to kick that AI to the
curb from time to time. Right?
Am 12.03.2026 um 21:18 schrieb DFS:if( size_t i = mt() % lines.size(); lines[i].size() )
* read in a list of words from a file here:
https://people.sc.fsu.edu/~jburkardt/datasets/words/words.html
* pick N random words from that list and put them in an array (OK if
there are a few dupes - if you can get no dupes that's better)
* sort and print the array of randoms, adding a blank line each time the
1st letter changes
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <sstream>
#include <algorithm>
#include <random>
using namespace std;
int main( int argc, char **argv )
{
if( argc < 2 )
return EXIT_FAILURE;
size_t nLines;
if( argc < 3 || !(istringstream( argv[2] ) >> nLines) )
nLines = 200;
ifstream ifs( argv[1] );
vector<string> lines;
size_t iLine = 0;
for( string line; iLine < nLines && !ifs.eof(); ++iLine )
if( getline( ifs, line ) )
lines.emplace_back( line );
vector<string> rndLines;
mt19937_64 mt;
for( size_t n = 0; n < nLines; ++n )
if( size_t i = mt() % nLines; lines[i].size() )
rndLines.emplace_back( move( lines[i] ) );
sort( rndLines.begin(), rndLines.end() );
iLine = 0;
string *pPrev = nullptr;
for( string &rndLine : rndLines )
{
if( pPrev && tolower( rndLine[0] ) != tolower( pPrev->front() ) )
cout << endl;
cout << ++iLine << ". " << rndLine << endl;
pPrev = &rndLine;
}
}
39 lines vs. 72 lines, and much more readability on my side.
And your code may have duplicates in the random list which
is sorted afterwards.
Am 12.03.2026 um 21:18 schrieb DFS:
* read in a list of words from a file here:
https://people.sc.fsu.edu/~jburkardt/datasets/words/words.html
* pick N random words from that list and put them in an array (OK if
there are a few dupes - if you can get no dupes that's better)
* sort and print the array of randoms, adding a blank line each time the
1st letter changes
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <sstream>
#include <algorithm>
#include <random>
using namespace std;
int main( int argc, char **argv )
{
if( argc < 2 )
return EXIT_FAILURE;
size_t nLines;
if( argc < 3 || !(istringstream( argv[2] ) >> nLines) )
nLines = 200;
ifstream ifs( argv[1] );
vector<string> lines;
size_t iLine = 0;
for( string line; iLine < nLines && !ifs.eof(); ++iLine )
if( getline( ifs, line ) )
lines.emplace_back( line );
vector<string> rndLines;
mt19937_64 mt;
for( size_t n = 0; n < nLines; ++n )
if( size_t i = mt() % nLines; lines[i].size() )
rndLines.emplace_back( move( lines[i] ) );
sort( rndLines.begin(), rndLines.end() );
iLine = 0;
string *pPrev = nullptr;
for( string &rndLine : rndLines )
{
if( pPrev && tolower( rndLine[0] ) != tolower( pPrev->front() ) )
cout << endl;
cout << ++iLine << ". " << rndLine << endl;
pPrev = &rndLine;
}
}
39 lines vs. 72 lines,
and much more readability on my side.
And your code may have duplicates in the random list which
is sorted afterwards.
Am 13.03.2026 um 09:48 schrieb Chris M. Thomasson:
On 3/12/2026 10:14 PM, Bonita Montero wrote:
The list was generated with ChatGpt.
Oh my. Beware, and always double, and triple, check its reams of code.
It's only word / telephone number tuples as .txt.
Am 13.03.2026 um 09:51 schrieb Chris M. Thomasson:
well, is that your code? Or the AI's code? Need to kick that AI to the
curb from time to time. Right?
AI never generates such dodgy ideas.
On 3/13/26 09:54, Bonita Montero wrote:
Am 13.03.2026 um 09:51 schrieb Chris M. Thomasson:
well, is that your code? Or the AI's code? Need to kick that AI to
the curb from time to time. Right?
AI never generates such dodgy ideas.
It does. ...
On 3/12/2026 8:54 PM, Scott Lurndal wrote:
DFS <nospam@dfs.com> writes:
I'll try that in C if you commit to trying the following in C++. Deal?A POSIX version:
-------------------------------------------------------------------------- >>> * read in a list of words from a file here:
https://people.sc.fsu.edu/~jburkardt/datasets/words/words.html
* pick N random words from that list and put them in an array (OK if
there are a few dupes - if you can get no dupes that's better)
* sort and print the array of randoms, adding a blank line each time the >>> 1st letter changes
(note: I just now wrote this. It's not old code.)
usage is
$./randwords filename N
$./randwords special_english.txt 200
-------------------------------------------------------------------------- >>
I took out all 7 instances of error trapping, and it throws:
On 3/13/2026 12:31 AM, DFS wrote:
On 3/12/2026 8:54 PM, Scott Lurndal wrote:
DFS <nospam@dfs.com> writes:
I took out all 7 instances of error trapping, and it throws:
Floating point exception (core dumped)
at line 61 or 62
Put back in just the stat() call
stat(argv[1], &st);
and the program works again.
Question: wordlist was malloced with a size of 0:
wordlist = malloc(wordcount * sizeof(const char *));
Why are you allowed to malloc size 0?
DFS <nospam@dfs.com> writes:
On 3/13/2026 12:31 AM, DFS wrote:
On 3/12/2026 8:54 PM, Scott Lurndal wrote:
DFS <nospam@dfs.com> writes:
I took out all 7 instances of error trapping, and it throws:
Floating point exception (core dumped)
at line 61 or 62
Put back in just the stat() call
stat(argv[1], &st);
and the program works again.
Yes, you can't willy-nilly remove lines from a program.
Question: wordlist was malloced with a size of 0:
wordlist = malloc(wordcount * sizeof(const char *));
Why are you allowed to malloc size 0?
"If the size of the space requested is 0, the behavior is
implementation-defined: either a null pointer shall be returned,
or the behavior shall be as if the size were some non-zero value,
except that the behavior is undefined if the returned pointer is
used to access an object."
https://pubs.opengroup.org/onlinepubs/9799919799/functions/malloc.html
Am 12.03.2026 um 15:43 schrieb DFS:
On 3/12/2026 10:13 AM, Bonita Montero wrote:
Here, do that in C:
static optional<size_t> parse( const char *str )
{
size_t ret;
if( from_chars_result fcr = from_chars( str, str +
strlen( str ), ret ); (bool)fcr.ec || *fcr.ptr )
return nullopt;
return ret;
}
Explain in detail what it does.
If that isn't self-explanatory stick with C.
I've got a task for you: Do the same in C:
#include <iostream>
#include <regex>
#include <fstream>
#include <vector>
#include <algorithm>
#include <vector>
using namespace std;
int main( int argc, char **argv )
{
if( argc < 2 )
return EXIT_FAILURE;
ifstream ifs( argv[1] );
static regex rxNameTel( "^\\s*\"([^\"]*)\"\\s*\"([^\"]*)\"\\s*$" );
struct name_tel { string name, tel; };
vector<name_tel> phoneList;
while( !ifs.eof() )
{
string line;
getline( ifs, line );
match_results<string::const_iterator> sm;
if( regex_match( line, sm, rxNameTel ) )
phoneList.emplace_back( string( sm[1].first, sm[1].second ), string( sm[2].first, sm[2].second ) );
}
sort( phoneList.begin(), phoneList.end(),
[]( const name_tel &left, const name_tel &right ) { return left.name < right.name; } );
for( name_tel &phone : phoneList )
cout << "\"" << phone.name << "\"\t\"" << phone.tel << "\"" <<
endl;
}
1. Read a file and parse it with ne mentioned regex-pattern.
2. Split both parts of every line in two strings.
3. Sort the "vector" according to the first string.
4. Print it.
I guess you don't manage to do that with less than five times the work.
I guess you don't manage to do that with less than five times the work.Every external lib allowed.
This stuff is child's play with any scripting language. The code will
also be cleaner and simpler.
Here you're trying to write C++ as though it was a scripting language,
by utilising its many bundled libraries, but it fails badly.
There is too much excruciating detail that you still have to write.
Even the easy bit, printing the sorted list, is a mess of punctuation.
While your compare function:
[]( const name_tel &left, const name_tel &right ) { return left.name
< right.name; }
would be '{l,r: l.name < r.name}'
If external libraries are allowed then C wouldn't be five times the line count. They just don't come as standard.
Am 12.03.2026 um 18:32 schrieb Scott Lurndal:
You didn't bother to check if the file was opened.
That's not necessary to compare it against a equal solution in C.
This is certainly a task that can be done simply using a shell
script and the standard POSIX utility set. awk(1) would be
a good starting point; it's possible that it can be done with
a single invocation of the posix 'sort' utility.
It's comparison of C against C++.
Of course, the lack of any in-line documentation (e.g. comments) is
a typical defect in your C++ code.
Complete idiot.
Am 13.03.2026 um 10:29 schrieb Janis Papanagnou:
On 3/13/26 09:54, Bonita Montero wrote:
Am 13.03.2026 um 09:51 schrieb Chris M. Thomasson:
well, is that your code? Or the AI's code? Need to kick that AI to
the curb from time to time. Right?
AI never generates such dodgy ideas.
It does. ...
No, AI doesn't even understand what I did there so that I had to correct
the code while reviewing it through AI. Redirecting signals to threads
is really uncommon.
On 3/13/2026 2:33 AM, Bonita Montero wrote:
Am 13.03.2026 um 10:29 schrieb Janis Papanagnou:
On 3/13/26 09:54, Bonita Montero wrote:
Am 13.03.2026 um 09:51 schrieb Chris M. Thomasson:
well, is that your code? Or the AI's code? Need to kick that AI to
the curb from time to time. Right?
AI never generates such dodgy ideas.
It does. ...
No, AI doesn't even understand what I did there so that I had to correct
the code while reviewing it through AI. Redirecting signals to threads
is really uncommon.
At least you can take a look at the AI reams o' code, and correct it.
Am 13.03.2026 um 09:48 schrieb Chris M. Thomasson:
On 3/12/2026 10:14 PM, Bonita Montero wrote:
Am 13.03.2026 um 02:19 schrieb Janis Papanagnou:
This is an interesting statement. - I first saw the typical
"Max Mustermann" that is in our country the prototype of an
artificial test entry with obvious test number 0123-4567890.
Then I picked an arbitrary entry and looked it up; I found
that person with exactly the associated telephone number. -
Where did you get these real names from?
The list was generated with ChatGpt.
Oh my. Beware, and always double, and triple, check its reams of code.
It's only word / telephone number tuples as .txt.
Am 13.03.2026 um 18:32 schrieb Bart:
This stuff is child's play with any scripting language. The code will
also be cleaner and simpler.
Also the C programms here that do similar things. But if your require-
ment is systems programming or performance you won't chose a scripting language.
You're really narrow-minded if you see it so superficially.
Here you're trying to write C++ as though it was a scripting language,
by utilising its many bundled libraries, but it fails badly.
No, the code works correctly according to the requirements.
There is too much excruciating detail that you still have to write.
Even the easy bit, printing the sorted list, is a mess of punctuation.
You're focussed on details without seeing the concept.
While your compare function:
[]( const name_tel &left, const name_tel &right ) { return
left.name < right.name; }
would be '{l,r: l.name < r.name}'
Yes, with one percent of the performance.
I shortened my code a bit.
Do that:
#include <iostream>
#include <regex>
#include <fstream>
#include <vector>
#include <algorithm>
using namespace std;
int main( int argc, char **argv )
{
if( argc < 2 )
return EXIT_FAILURE;
ifstream ifs( argv[1] );
static regex rxNameTel( R"~(^\s*"([^"]*)"\s*"([^"]*)"\s*$)~" );
struct name_tel { string name, tel; };
vector<name_tel> phoneList;
smatch sm;
for( string line; getline( ifs, line ); )
if( regex_match( line, sm, rxNameTel ) )
phoneList.emplace_back( sm[1].str(), sm[2].str() );
sort( phoneList.begin(), phoneList.end(),
[]( name_tel &left, name_tel &right ) { return left.name < right.name; } );
for( name_tel &phone : phoneList )
cout << "\"" << phone.name << "\"\t\"" << phone.tel << "\"" <<
endl;
}
Have a closer look at the regex.
::construct(std::allocator_traits<std::allocator<_CharT>std::allocator<char> >}; _Tp = main(int, char**)::name_tel; std::allocator_traits<std::allocator<_CharT> >::allocator_type = std::allocator<main(int, char**)::name_tel>]’ /usr/include/c++/11/bits/vector.tcc:115:30: required from ‘std::vector<_Tp, _Alloc>::reference std::vector<_Tp, _Alloc>::emplace_back(_Args&& ...) [with _Args = {std::__cxx11::basic_string<char, std::char_traits<char>,
::allocator_type&, _Up*, _Args&& ...) [with _Up = main(int, char**)::name_tel; _Args = {std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::__cxx11::basic_string<char, std::char_traits<char>,
Question: wordlist was malloced with a size of 0:
wordlist = malloc(wordcount * sizeof(const char *));
Why are you allowed to malloc size 0?
On 3/12/2026 8:27 PM, Bonita Montero wrote:
I shortened my code a bit.
Do that:
#include <iostream>
#include <regex>
#include <fstream>
#include <vector>
#include <algorithm>
using namespace std;
int main( int argc, char **argv )
{
if( argc < 2 )
return EXIT_FAILURE;
ifstream ifs( argv[1] );
static regex rxNameTel( R"~(^\s*"([^"]*)"\s*"([^"]*)"\s*$)~" );
struct name_tel { string name, tel; };
vector<name_tel> phoneList;
smatch sm;
for( string line; getline( ifs, line ); )
if( regex_match( line, sm, rxNameTel ) )
phoneList.emplace_back( sm[1].str(), sm[2].str() ); >> sort( phoneList.begin(), phoneList.end(),
[]( name_tel &left, name_tel &right ) { return left.name < >> right.name; } );
for( name_tel &phone : phoneList )
cout << "\"" << phone.name << "\"\t\"" << phone.tel << "\""
<< endl;
}
Have a closer look at the regex.
Got a massive set of compile errors:--- Synchronet 3.21d-Linux NewsLink 1.2
= -std=c++20
BTW that C++ test, based on your small example, took 3 seconds
to compile. It must be pulling in a huge amount of stuff.
On 3/12/2026 11:48 AM, Bonita Montero wrote:
Am 12.03.2026 um 16:22 schrieb DFS:
Give me the file you used.
Better take this:
That's 50 in and 50 out.
So what's the RegEx for?
Am 13.03.2026 um 21:11 schrieb DFS:
On 3/12/2026 8:27 PM, Bonita Montero wrote:
I shortened my code a bit.
Do that:
#include <iostream>
#include <regex>
#include <fstream>
#include <vector>
#include <algorithm>
using namespace std;
int main( int argc, char **argv )
{
if( argc < 2 )
return EXIT_FAILURE;
ifstream ifs( argv[1] );
static regex rxNameTel( R"~(^\s*"([^"]*)"\s*"([^"]*)"\s*$)~" ); >>> struct name_tel { string name, tel; };
vector<name_tel> phoneList;
smatch sm;
for( string line; getline( ifs, line ); )
if( regex_match( line, sm, rxNameTel ) )
phoneList.emplace_back( sm[1].str(), sm[2].str() );
sort( phoneList.begin(), phoneList.end(),
[]( name_tel &left, name_tel &right ) { return left.name <
right.name; } );
for( name_tel &phone : phoneList )
cout << "\"" << phone.name << "\"\t\"" << phone.tel << "\""
<< endl;
}
Have a closer look at the regex.
Got a massive set of compile errors:
= -std=c++20
Your output is messy. What you want to do is iterate the data and find
the longest name, then pad spaces after the name so the phone numbers
line up.
88 LOC
16.84MB executable
perfect output
$ ./dfs names-numbers-unsorted-montero.txt
1. Anna Becker 0170-2233445
...
Am 14.03.2026 um 06:49 schrieb DFS:
Your output is messy. What you want to do is iterate the data and
find the longest name, then pad spaces after the name so the phone
numbers line up.
No, alphabetically ordered. My code exactly does that.
88 LOC
16.84MB executable
perfect output
$ ./dfs names-numbers-unsorted-montero.txt
1. Anna Becker 0170-2233445
...
My output is the same without line numbers:
"Anna Fischer" "0341-9988776"....
"Anna Müller" "0987-6543210"
"Ben Meier" "0341-5566778"
"Ben Richter" "069-3344556"
"Clara Hofmann" "0157-2233445"
"Clara Zimmermann" "040-5566778"
"Tom Bauer" "0171-1122334"
Eh, 35 lines. I forget to remove namePad.
#include <iostream>
#include <regex>
#include <fstream>
#include <vector>
#include <algorithm>
#include <sstream>
#include <iomanip>
using namespace std;
int main( int argc, char **argv )
{
if( argc < 2 )
return EXIT_FAILURE;
ifstream ifs( argv[1] );
static regex rxNameTel( R"~(^\s*"([^"]*)"\s*"([^"]*)"\s*$)~" );
struct name_tel { string name, tel; };
vector<name_tel> phoneList;
smatch sm;
for( string line; !ifs.eof(); )
if( getline( ifs, line ) && regex_match( line, sm, rxNameTel ) )
phoneList.emplace_back( sm[1].str(), sm[2].str() );
if( !phoneList.size() )
return EXIT_FAILURE;
sort( phoneList.begin(), phoneList.end(),
[]( name_tel &left, name_tel &right ) { return left.name < right.name; } );
size_t
maxName = max_element( phoneList.begin(), phoneList.end(),
[]( name_tel &a, name_tel &b ) { return a.name.length() <
b.name.length(); } )->name.length(),
maxLineNo = (ostringstream() << phoneList.size()).rdbuf()->view().size();
string totalNamePad( maxName, ' ' );
size_t iLine = 1;
for( name_tel &phone : phoneList )
cout << setw( maxLineNo ) << right << iLine++ << ". " << setw(
maxName ) << left << phone.name << " " << phone.tel << endl;
}
You didn't handle multibyte characters like ä and ö and ü, so the phone numbers aren't lined up.
Am 14.03.2026 um 08:05 schrieb DFS:
You didn't handle multibyte characters like ä and ö and ü, so the
phone numbers aren't lined up.
No, my output is correct. Don't trust the console,
print everyhting into a file with "xxx yyy > filename".
Am 13.03.2026 um 02:22 schrieb DFS:
On 3/12/2026 11:48 AM, Bonita Montero wrote:
Am 12.03.2026 um 16:22 schrieb DFS:
Give me the file you used.
Better take this:
That's 50 in and 50 out.
So what's the RegEx for?
For each line:
1. Skip as many whitespace as possible.
2. Match '"'.
3. Read name until a '"' comes.
4. Match '"'.
5. Skip as many whitespace as possible.
6. Repeat step 1 to 5 for the telephone number.
7. Match line end.
7. Store name and telephone number in a list.
8. Sort the list according to the name.
9. Print each entry.
Your output is most definitely NOT correct.The problem is that setw( xxx ) on cout doesn't support UTF-8 strings
On 3/14/2026 3:10 AM, Bonita Montero wrote:
Am 14.03.2026 um 08:05 schrieb DFS:
You didn't handle multibyte characters like ä and ö and ü, so the
phone numbers aren't lined up.
No, my output is correct. Don't trust the console,
print everyhting into a file with "xxx yyy > filename".
Your output is most definitely NOT correct.
Mine is, though. See the setspacing() function to see how I did it.
Eh, 35 lines. I forget to remove namePad.
#include <iostream>
#include <regex>
#include <fstream>
#include <vector>
#include <algorithm>
#include <sstream>
#include <iomanip>
using namespace std;
int main( int argc, char **argv )
{
if( argc < 2 )
return EXIT_FAILURE;
ifstream ifs( argv[1] );
static regex rxNameTel( R"~(^\s*"([^"]*)"\s*"([^"]*)"\s*$)~" );
struct name_tel { string name, tel; };
vector<name_tel> phoneList;
smatch sm;
for( string line; !ifs.eof(); )
if( getline( ifs, line ) && regex_match( line, sm, rxNameTel ) )
phoneList.emplace_back( sm[1].str(), sm[2].str() );
if( !phoneList.size() )
return EXIT_FAILURE;
sort( phoneList.begin(), phoneList.end(),
[]( name_tel &left, name_tel &right ) { return left.name < right.name; } );
size_t
maxName = max_element( phoneList.begin(), phoneList.end(),
[]( name_tel &a, name_tel &b ) { return a.name.length() <
b.name.length(); } )->name.length(),
maxLineNo = (ostringstream() << phoneList.size()).rdbuf()-
view().size();string totalNamePad( maxName, ' ' );
size_t iLine = 1;
for( name_tel &phone : phoneList )
cout << setw( maxLineNo ) << right << iLine++ << ". " << setw( maxName ) << left << phone.name << " " << phone.tel << endl;
}
In terms of file size, it's about 1.9:1. Both use spaced indents. If I
get rid of leading white space (and some trailing white space in DFS version), then difference is 1.7:1.
Your C++ always looks like total gobbledygook.
As for binary sizes, those aren't so interesting: using -Os -s:
BM: 87KB
DFS: 50KB
BM g++-Os -s 4.9 seconds, or 6.5 lps
DFS gcc-Os -s 0.3 seconds, or 270 lps
Both are poor frankly, but the C++ was still significantly slower.
Eh, 35 lines. I forget to remove namePad.
#include <iostream>
#include <regex>
#include <fstream>
#include <vector>
#include <algorithm>
#include <sstream>
#include <iomanip>
using namespace std;
int main( int argc, char **argv )
{
if( argc < 2 )
return EXIT_FAILURE;
ifstream ifs( argv[1] );
static regex rxNameTel( R"~(^\s*"([^"]*)"\s*"([^"]*)"\s*$)~" );
struct name_tel { string name, tel; };
vector<name_tel> phoneList;
smatch sm;
for( string line; !ifs.eof(); )
if( getline( ifs, line ) && regex_match( line, sm, rxNameTel ) )
phoneList.emplace_back( sm[1].str(), sm[2].str() );
if( !phoneList.size() )
return EXIT_FAILURE;
sort( phoneList.begin(), phoneList.end(),
[]( name_tel &left, name_tel &right ) { return left.name < right.name; } );
size_t
maxName = max_element( phoneList.begin(), phoneList.end(),
[]( name_tel &a, name_tel &b ) { return a.name.length() <
b.name.length(); } )->name.length(),
maxLineNo = (ostringstream() << phoneList.size()).rdbuf()-
view().size();string totalNamePad( maxName, ' ' );
size_t iLine = 1;
for( name_tel &phone : phoneList )
cout << setw( maxLineNo ) << right << iLine++ << ". " << setw( maxName ) << left << phone.name << " " << phone.tel << endl;
cout << setw( maxLineNo ) << right << iLine++ << ". " <<
setw( maxName ) << left << phone.name << " " << phone.tel << endl;
In C it would be:
printf("%*d. %-*s %s\n", maxLineNo, iLine++, maxName, phone.name, phone.tel);
For that matter, this can be done in C++ too, but you chose the most long-winded syntax possible.
You seem incapable of using a simple approach when a more complicated exists! You don't /have/ to use every possible feature you know.
Am 14.03.2026 um 17:22 schrieb Bart:
cout << setw( maxLineNo ) << right << iLine++ << ". " <<
setw( maxName ) << left << phone.name << " " << phone.tel << endl;
In C it would be:
printf("%*d. %-*s %s\n", maxLineNo, iLine++, maxName, phone.name,
phone.tel);
With dynamic widths according to the maximum length of name and tel ?
For that matter, this can be done in C++ too, but you chose the most
long-winded syntax possible.
With additional flexibility as I said, and type-safe;
printf() is sick in that sense.
Yes that's one of my many criticisms of it: you have to get those %d and
%s formats right. But given that, here it does the job.
scott@slp53.sl.home (Scott Lurndal) writes:
DFS <nospam@dfs.com> writes:
On 3/13/2026 12:31 AM, DFS wrote:
On 3/12/2026 8:54 PM, Scott Lurndal wrote:
DFS <nospam@dfs.com> writes:
I took out all 7 instances of error trapping, and it throws:
Floating point exception (core dumped)
at line 61 or 62
Put back in just the stat() call
stat(argv[1], &st);
and the program works again.
Yes, you can't willy-nilly remove lines from a program.
Question: wordlist was malloced with a size of 0:
wordlist = malloc(wordcount * sizeof(const char *));
Why are you allowed to malloc size 0?
"If the size of the space requested is 0, the behavior is
implementation-defined: either a null pointer shall be returned,
or the behavior shall be as if the size were some non-zero value,
except that the behavior is undefined if the returned pointer is
used to access an object."
https://pubs.opengroup.org/onlinepubs/9799919799/functions/malloc.html
IIRC, this caveat was added due to differences in the malloc(3) implementations for System V Unix and BSD Unix.
DFS <nospam@dfs.com> writes:
...
Question: wordlist was malloced with a size of 0:
wordlist = malloc(wordcount * sizeof(const char *));
Why are you allowed to malloc size 0?
Because, in some contexts, it's convenient to allow a mixture of
0-sized and non-zero sized allocations, depending upon the value of
a variable, so some pre-standard versions of malloc() supported
malloc(0) returning a unique pointer to memory that could not be
accessed. The uniqueness of the pointer allowed the value of the
pointer to be used as an identifier for the thing that might or
might not have been allocated.
This was a sufficiently common feature that the C committee decided
to allow it, but sufficiently rare that the C committee decided not
to mandate it.
scott@slp53.sl.home (Scott Lurndal) writes:
scott@slp53.sl.home (Scott Lurndal) writes:
DFS <nospam@dfs.com> writes:
On 3/13/2026 12:31 AM, DFS wrote:
On 3/12/2026 8:54 PM, Scott Lurndal wrote:
DFS <nospam@dfs.com> writes:
I took out all 7 instances of error trapping, and it throws:
Floating point exception (core dumped)
at line 61 or 62
Put back in just the stat() call
stat(argv[1], &st);
and the program works again.
Yes, you can't willy-nilly remove lines from a program.
Question: wordlist was malloced with a size of 0:
wordlist = malloc(wordcount * sizeof(const char *));
Why are you allowed to malloc size 0?
"If the size of the space requested is 0, the behavior is
implementation-defined: either a null pointer shall be returned,
or the behavior shall be as if the size were some non-zero value,
except that the behavior is undefined if the returned pointer is
used to access an object."
https://pubs.opengroup.org/onlinepubs/9799919799/functions/malloc.html
IIRC, this caveat was added due to differences in the malloc(3)
implementations for System V Unix and BSD Unix.
That sounds right, except I might say "put in" rather than "added"
since as best I can determine that allowance was present in the
earliest drafts of the C standard and POSIX discussions.
Note that malloc() is not mentioned in K&R, and apparently was
added to AT&T Unix in Unix V7. The timing on that was about the
same time as the first BSD Unix.
On 14/03/2026 06:53, Bonita Montero wrote:
Eh, 35 lines. I forget to remove namePad.
Your C++ always looks like total gobbledygook.
On 3/14/2026 8:15 AM, Bart wrote:
On 14/03/2026 06:53, Bonita Montero wrote:
Eh, 35 lines. I forget to remove namePad.
<snip>
Your C++ always looks like total gobbledygook.
Not sure you can fix it - it's just how C++ looks.
Without using or looking back at Montero's code, I did my own research
and replicated this little bit of functionality using recommended C++
style and libraries and functions:
read file in
clean data (remove whitespace, remove quote marks)
sort
But for my money, python is the best tradeoff of ease of use,
readability, speed and functionality.
read file in
clean data (remove whitespace, remove quote marks)
sort
On 3/14/2026 8:15 AM, Bart wrote:
On 14/03/2026 06:53, Bonita Montero wrote:
Eh, 35 lines. I forget to remove namePad.
<snip>
Your C++ always looks like total gobbledygook.
Not sure you can fix it - it's just how C++ looks.
Without using or looking back at Montero's code, I did my own research
and replicated this little bit of functionality using recommended C++
style and libraries and functions:
read file in
clean data (remove whitespace, remove quote marks)
sort
and this is what it looks like:
=================================================================================
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <regex>
#include <iomanip>
using namespace std;
int main(int argc, char* argv[]) {
vector<string> lines; // store all lines
string line; // store one line
regex re_strip ("^\\s+|\\s+$"); // regex to strip whitespace
regex re_quotes ("\""); // regex to remove quote marks
ifstream file(argv[1]); // Open the file
while (getline(file, line)) { // read lines, clean, add to array
line = regex_replace(line, re_strip , "");
line = regex_replace(line, re_quotes, "");
lines.push_back(line);
}
file.close();
sort(lines.begin(), lines.end());
int i = 0;
for (const string& stored_line : lines) {
cout << right << setw(2) << ++i << ". " << stored_line << endl;
}
return 0;
}
=================================================================================
fairly hideous to look at, and 6 includes required?
Even with the -Os compile flag you mentioned, the executable is 101MB. That's crazy for that tiny bit of functionality.
But for this little code, C++ does some nice things for you: memory management,
easier regex usage, and easier sorting.
On 16/03/2026 20:43, DFS wrote:
read file in
clean data (remove whitespace, remove quote marks)
sort
---
#!/bin/ksh
while read
do
echo ${REPLY//[ \"]/}
done | sort
---
[After Scott's sed]
DFS <nospam@dfs.com> writes:
On 3/14/2026 8:15 AM, Bart wrote:
On 14/03/2026 06:53, Bonita Montero wrote:
Eh, 35 lines. I forget to remove namePad.
<snip>
Your C++ always looks like total gobbledygook.
Not sure you can fix it - it's just how C++ looks.
Without using or looking back at Montero's code, I did my own research
and replicated this little bit of functionality using recommended C++
style and libraries and functions:
read file in
clean data (remove whitespace, remove quote marks)
sort
Technically, your example removes _leading_ and _trailing_
whitespace, no?
$ sed -e 's/^[ \t]*//;s/[ \t]*$//' -e 's/"//' < inputfile | sort
But for my money, python is the best tradeoff of ease of use,
readability, speed and functionality.
For functionality that can be composed from standard command
line utilities, even python loses.
On 16/03/2026 20:43, DFS wrote:
read file in
clean data (remove whitespace, remove quote marks)
sort
---
#!/bin/ksh
while read
do
echo ${REPLY//[ \"]/}
done | sort
---
[After Scott's sed]
On 3/16/2026 6:26 PM, Richard Harnden wrote:
On 16/03/2026 20:43, DFS wrote:
read file in
clean data (remove whitespace, remove quote marks)
sort
---
#!/bin/ksh
while read
do
echo ${REPLY//[ \"]/}
done | sort
---
[After Scott's sed]
output:
AnnaBecker0170-2233445
AnnaFischer0341-9988776
AnnaMüller0987-6543210
BenMeier0341-5566778
BenRichter069-3344556
ClaraHofmann0157-2233445
Me know that not right!
On 16/03/2026 23:09, DFS wrote:
On 3/16/2026 6:26 PM, Richard Harnden wrote:
On 16/03/2026 20:43, DFS wrote:
read file in
clean data (remove whitespace, remove quote marks)
sort
---
#!/bin/ksh
while read
do
echo ${REPLY//[ \"]/}
done | sort
---
[After Scott's sed]
output:
AnnaBecker0170-2233445
AnnaFischer0341-9988776
AnnaMüller0987-6543210
BenMeier0341-5566778
BenRichter069-3344556
ClaraHofmann0157-2233445
Me know that not right!
Yes. You and Bart are right.
On 3/16/2026 7:17 PM, Richard Harnden wrote:
On 16/03/2026 23:09, DFS wrote:
On 3/16/2026 6:26 PM, Richard Harnden wrote:
On 16/03/2026 20:43, DFS wrote:
read file in
clean data (remove whitespace, remove quote marks)
sort
---
#!/bin/ksh
while read
do
echo ${REPLY//[ \"]/}
done | sort
---
[After Scott's sed]
output:
AnnaBecker0170-2233445
AnnaFischer0341-9988776
AnnaMüller0987-6543210
BenMeier0341-5566778
BenRichter069-3344556
ClaraHofmann0157-2233445
Me know that not right!
Yes. You and Bart are right.
Can you make it look like:
Anna Becker 0170-2233445
Anna Fischer 0341-9988776
Anna Müller 0987-6543210
Ben Meier 0341-5566778
Ben Richter 069-3344556
Clara Hofmann 0157-2233445
On 16/03/2026 20:43, DFS wrote:
On 3/14/2026 8:15 AM, Bart wrote:
On 14/03/2026 06:53, Bonita Montero wrote:
Eh, 35 lines. I forget to remove namePad.
<snip>
Your C++ always looks like total gobbledygook.
Not sure you can fix it - it's just how C++ looks.
Without using or looking back at Montero's code, I did my own research
and replicated this little bit of functionality using recommended C++
style and libraries and functions:
read file in
clean data (remove whitespace, remove quote marks)
sort
and this is what it looks like:
=================================================================================
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <regex>
#include <iomanip>
using namespace std;
int main(int argc, char* argv[]) {
vector<string> lines; // store all lines >> string line; // store one line
regex re_strip ("^\\s+|\\s+$"); // regex to strip whitespace
regex re_quotes ("\""); // regex to remove quote marks
ifstream file(argv[1]); // Open the file
while (getline(file, line)) { // read lines, clean, add to array
line = regex_replace(line, re_strip , "");
line = regex_replace(line, re_quotes, "");
lines.push_back(line);
}
file.close();
sort(lines.begin(), lines.end());
int i = 0;
for (const string& stored_line : lines) {
cout << right << setw(2) << ++i << ". " << stored_line << endl;
}
return 0;
}
This works very differently:
Perhaps it's not as simple as it seemed! Did your Python version do the
same as this?
It's a rather fiddly spec for me to bother doing it properly, but I
threw something together in my scripting language which is shown below.
It shows the equivalent of BM's output.
But it cuts some corners: the column widths of the output are hardcoded (easy to fix but untidy), and it uses a custom sort routine (not shown,
but is a bubble sort), as I don't have a ready-made library routine that takes a compare function.
=================================================================================
fairly hideous to look at, and 6 includes required?
Even with the -Os compile flag you mentioned, the executable is 101MB.
That's crazy for that tiny bit of functionality.
But for this little code, C++ does some nice things for you: memory
management,
Yeah. But in a language like C (or my static one), I'd just use a
slightly different approach. Maybe an extra pass over the data to
establish the size of the table, then you just allocate it all at once.
(My compiler project never actually free any memory!)
easier regex usage, and easier sorting.
I've never used regex. As you can see from my example, decent i/o
routines can eliminate the need.
Sorting though can get complex. You'd need a lot more than Sort with a compare function to work with real phone data.
-------------------------
record rec =
var name, tel
end
f:=openfile(sread("n"))
phonelist::=()
while not eof(f) do
readln @f, name:"s", tel:"s"
nextloop when name=""
phonelist &:= rec(name, tel)
end
closefile(f)
sort(phonelist)
for i,x in phonelist do
fprintln "#. # #", i:"3", x.name:"15jl", x.tel
end
------------------
(The "s" input format reads case-preserved names or files. Inputs can be quoted to allow embedded separators within context. Quotes are
discarded. White space around items is skipped anyway.
Here also, names can have embedded quotes, but they need to be doubled up:
"White ""House""" -> White "House")
On 16/03/2026 23:21, DFS wrote:
On 3/16/2026 7:17 PM, Richard Harnden wrote:
On 16/03/2026 23:09, DFS wrote:
On 3/16/2026 6:26 PM, Richard Harnden wrote:
On 16/03/2026 20:43, DFS wrote:
read file in
clean data (remove whitespace, remove quote marks)
sort
---
#!/bin/ksh
while read
do
echo ${REPLY//[ \"]/}
done | sort
---
[After Scott's sed]
output:
AnnaBecker0170-2233445
AnnaFischer0341-9988776
AnnaMüller0987-6543210
BenMeier0341-5566778
BenRichter069-3344556
ClaraHofmann0157-2233445
Me know that not right!
Yes. You and Bart are right.
Can you make it look like:
Anna Becker 0170-2233445
Anna Fischer 0341-9988776
Anna Müller 0987-6543210
Ben Meier 0341-5566778
Ben Richter 069-3344556
Clara Hofmann 0157-2233445
Kinda ...
$ awk -F\" '{printf("%-20s|%s\n", $2, $4)}' input.txt \ |
sort -t\| -k1 \ |
tr -d "|" \ |
head
Anna Becker 0170-2233445
Anna Fischer 0341-9988776
Anna Müller 0987-6543210
Ben Meier 0341-5566778
Ben Richter 069-3344556
Clara Hofmann 0157-2233445
Clara Zimmermann 040-5566778
David Schulz 030-9988776
Emma Bauer 0157-9988776
Emma Wolf 040-5566778
... it's not pretty.
On 3/16/2026 6:26 PM, Bart wrote:
This works very differently:
It wasn't meant to duplicate Montero's.
It was meant as a demo of how C++ looks by default.
This is a python implementation of the 'quicksort' algorithm I found
online.
# Simplified in-place QuickSort in Python
def quick_sort(array, low, high):
if low < high:
pi = partition(array, low, high)
quick_sort(array, low, pi - 1)
quick_sort(array, pi + 1, high)
def partition(array, low, high):
pivot = array[high]
i = low - 1
for j in range(low, high):
if array[j] <= pivot:
i += 1
array[i], array[j] = array[j], array[i]
array[i + 1], array[high] = array[high], array[i + 1]
return i + 1
I assume you could easily port it to BartScript?
"White ""House""" -> White "House")
How can we run a BartScript program?
I assume you rarely reply with C code because of the coding time?
Python takes me 1/5th to 1/3rd the time of C to write. Fantastic
language in many ways.
On 3/16/2026 4:57 PM, Scott Lurndal wrote:
DFS <nospam@dfs.com> writes:
On 3/14/2026 8:15 AM, Bart wrote:
On 14/03/2026 06:53, Bonita Montero wrote:
Eh, 35 lines. I forget to remove namePad.
<snip>
Your C++ always looks like total gobbledygook.
Not sure you can fix it - it's just how C++ looks.
Without using or looking back at Montero's code, I did my own research
and replicated this little bit of functionality using recommended C++
style and libraries and functions:
read file in
clean data (remove whitespace, remove quote marks)
sort
Technically, your example removes _leading_ and _trailing_
whitespace, no?
Yes.
$ sed -e 's/^[ \t]*//;s/[ \t]*$//' -e 's/"//' < inputfile | sort
This is what I got from that:
Anna Becker" "0170-2233445"
Anna Fischer" "0341-9988776"
Anna Müller" "0987-6543210"
Ben Meier" "0341-5566778"
Ben Richter" "069-3344556"
...
Not what we're looking for.
But for my money, python is the best tradeoff of ease of use,
readability, speed and functionality.
For functionality that can be composed from standard command
line utilities, even python loses.
Sure, but sed and regex are inscrutable.
original Montero file
"Max Mustermann" "0123-4567890"
"Anna Müller" "0987-6543210"
"Peter Schmidt" "030-1234567"
"Laura Fischer" "040-9876543"
"Tim Becker" "0151-1112223"
"Julia Neumann" "0221-3344556"
"Michael Braun" "0170-9988776"
"Sophie Wagner" "089-2233445"
"Felix Hoffmann" "0711-5566778"
"Lea Richter" "0341-1122334"
"Jonas Klein" "030-4455667"
"Emma Wolf" "040-5566778"
"Lukas König" "069-7788990"
"Clara Hofmann" "0157-2233445"
"Paul Schäfer" "0228-3344556"
"Mia Keller" "089-6677889"
"Leon Zimmermann" "0711-1122445"
"Nina Krause" "0341-4455667"
"David Schulz" "030-9988776"
"Sarah Lehmann" "040-2233445"
"Ben Richter" "069-3344556"
"Hannah Wagner" "0221-5566778"
"Tom Bauer" "0171-1122334"
"Lena Fischer" "0151-6677889"
"Simon Meier" "030-7788990"
"Marie Becker" "040-4455667"
"Jan Hoffmann" "0711-3344556"
"Leonie Klein" "0341-5566778"
"Philipp König" "089-9988776"
"Laura Schulze" "0228-1122334"
"Moritz Wolf" "0157-4455667"
"Jana Zimmer" "030-6677889"
"Felix Neumann" "0170-2233445"
"Sarah Braun" "040-7788990"
"Tim Schäfer" "0711-5566778"
"Anna Fischer" "0341-9988776"
"Maximilian Keller" "030-1122334"
"Lea Wagner" "0151-3344556"
"Lukas Hofmann" "089-6677889"
"Marie Richter" "0221-7788990"
"Jonas Klein" "0171-4455667"
"Clara Zimmermann" "040-5566778"
"Paul Wolf" "030-2233445"
"Sophie Neumann" "0711-3344556"
"Ben Meier" "0341-5566778"
"Emma Bauer" "0157-9988776"
"Leon Krause" "089-1122334"
"Julia Schulz" "0228-4455667"
"Tim Richter" "030-6677889"
"Anna Becker" "0170-2233445"
This 12-line python reads the file, strips leading/trailing spaces,
removes quote marks, determines the longest name for spacing (if new
longer names are added it still pretty-prints), and outputs
first-last-phone numbered and aligned and sorted by last-first.
import sys
names = []
longname = 0
with open(sys.argv[1],'r') as f:
for line in f:
line = line.replace('"','')
e = [i.strip() for i in line.split(' ') if i]
ln = len(e[0]) + len(e[1])
if ln > longname: longname = ln
names.append((e[1], e[0], e[2]))
for i,n in enumerate(sorted(names)):
print("%2d. %-*s %s" % (i+1, longname+2, n[1]+' '+n[0], n[2]))
1. Emma Bauer 0157-9988776
2. Tom Bauer 0171-1122334
3. Anna Becker 0170-2233445
4. Marie Becker 040-4455667
5. Tim Becker 0151-1112223
6. Michael Braun 0170-9988776
7. Sarah Braun 040-7788990
8. Anna Fischer 0341-9988776
9. Laura Fischer 040-9876543
10. Lena Fischer 0151-6677889
11. Felix Hoffmann 0711-5566778
12. Jan Hoffmann 0711-3344556
13. Clara Hofmann 0157-2233445
14. Lukas Hofmann 089-6677889
15. Maximilian Keller 030-1122334
16. Mia Keller 089-6677889
17. Jonas Klein 0171-4455667
18. Jonas Klein 030-4455667
19. Leonie Klein 0341-5566778
20. Leon Krause 089-1122334
21. Nina Krause 0341-4455667
22. Lukas König 069-7788990
23. Philipp König 089-9988776
24. Sarah Lehmann 040-2233445
25. Ben Meier 0341-5566778
26. Simon Meier 030-7788990
27. Max Mustermann 0123-4567890
28. Anna Müller 0987-6543210
29. Felix Neumann 0170-2233445
30. Julia Neumann 0221-3344556
31. Sophie Neumann 0711-3344556
32. Ben Richter 069-3344556
33. Lea Richter 0341-1122334
34. Marie Richter 0221-7788990
35. Tim Richter 030-6677889
36. Peter Schmidt 030-1234567
37. David Schulz 030-9988776
38. Julia Schulz 0228-4455667
39. Laura Schulze 0228-1122334
40. Paul Schäfer 0228-3344556
41. Tim Schäfer 0711-5566778
42. Hannah Wagner 0221-5566778
43. Lea Wagner 0151-3344556
44. Sophie Wagner 089-2233445
45. Emma Wolf 040-5566778
46. Moritz Wolf 0157-4455667
47. Paul Wolf 030-2233445
48. Jana Zimmer 030-6677889
49. Clara Zimmermann 040-5566778
50. Leon Zimmermann 0711-1122445
If you can do that with sed and regex and pipes in 1 or 2 lines, I'm
gonna hurl.
On 16/03/2026 23:21, DFS wrote:
On 3/16/2026 7:17 PM, Richard Harnden wrote:
On 16/03/2026 23:09, DFS wrote:
On 3/16/2026 6:26 PM, Richard Harnden wrote:
On 16/03/2026 20:43, DFS wrote:
read file in
clean data (remove whitespace, remove quote marks)
sort
---
#!/bin/ksh
while read
do
echo ${REPLY//[ \"]/}
done | sort
---
[After Scott's sed]
output:
AnnaBecker0170-2233445
AnnaFischer0341-9988776
AnnaMüller0987-6543210
BenMeier0341-5566778
BenRichter069-3344556
ClaraHofmann0157-2233445
Me know that not right!
Yes. You and Bart are right.
Can you make it look like:
Anna Becker 0170-2233445
Anna Fischer 0341-9988776
Anna Müller 0987-6543210
Ben Meier 0341-5566778
Ben Richter 069-3344556
Clara Hofmann 0157-2233445
Kinda ...
$ awk -F\" '{printf("%-20s|%s\n", $2, $4)}' input.txt \ |
sort -t\| -k1 \ |
tr -d "|" \ |
head
Anna Becker 0170-2233445
Anna Fischer 0341-9988776
Anna Müller 0987-6543210
Ben Meier 0341-5566778
Ben Richter 069-3344556
Clara Hofmann 0157-2233445
Clara Zimmermann 040-5566778
David Schulz 030-9988776
Emma Bauer 0157-9988776
Emma Wolf 040-5566778
... it's not pretty.
I split the long line and got the '| \' backwards :(
Second attempt, this time with line numbers ...
$ awk -F\" '{printf("%-20s|%s\n", $2, $4)}' input.txt | \
sort -t\| -k1 | \
awk -F\| 'BEGIN {i=1} {printf("%2d. %-20s|%s\n", i,$1,$2); i++}' | \
tr -d "|"
1. Anna Becker 0170-2233445
2. Anna Fischer 0341-9988776
3. Anna Müller 0987-6543210
4. Ben Meier 0341-5566778
5. Ben Richter 069-3344556
[...]
47. Tim Becker 0151-1112223
48. Tim Richter 030-6677889
49. Tim Schäfer 0711-5566778
50. Tom Bauer 0171-1122334
On 16/03/2026 23:07, DFS wrote:
On 3/16/2026 4:57 PM, Scott Lurndal wrote:
DFS <nospam@dfs.com> writes:
On 3/14/2026 8:15 AM, Bart wrote:
On 14/03/2026 06:53, Bonita Montero wrote:
Eh, 35 lines. I forget to remove namePad.
<snip>
Your C++ always looks like total gobbledygook.
Not sure you can fix it - it's just how C++ looks.
Without using or looking back at Montero's code, I did my own research >>>> and replicated this little bit of functionality using recommended C++
style and libraries and functions:
read file in
clean data (remove whitespace, remove quote marks)
sort
Technically, your example removes _leading_ and _trailing_
whitespace, no?
Yes.
$ sed -e 's/^[ \t]*//;s/[ \t]*$//' -e 's/"//' < inputfile | sort
This is what I got from that:
Anna Becker" "0170-2233445"
Anna Fischer" "0341-9988776"
Anna Müller" "0987-6543210"
Ben Meier" "0341-5566778"
Ben Richter" "069-3344556"
...
Not what we're looking for.
But for my money, python is the best tradeoff of ease of use,
readability, speed and functionality.
For functionality that can be composed from standard command
line utilities, even python loses.
Sure, but sed and regex are inscrutable.
original Montero file
[...]
This 12-line python reads the file, strips leading/trailing spaces,
removes quote marks, determines the longest name for spacing (if new
longer names are added it still pretty-prints), and outputs
first-last-phone numbered and aligned and sorted by last-first.
import sys
names = []
longname = 0
with open(sys.argv[1],'r') as f:
for line in f:
line = line.replace('"','')
e = [i.strip() for i in line.split(' ') if i]
ln = len(e[0]) + len(e[1])
if ln > longname: longname = ln
names.append((e[1], e[0], e[2]))
for i,n in enumerate(sorted(names)):
print("%2d. %-*s %s" % (i+1, longname+2, n[1]+' '+n[0], n[2]))
[...]
If you can do that with sed and regex and pipes in 1 or 2 lines, I'm
gonna hurl.
I can do something that works with the provided data with util-linux,
sed, awk, and coreutils:
{ sed 's/"//g' | awk '{print $1 " " $2 ":" $3}' | sort -k2 | column -t
-s: | nl -s". " ; } < data
where the file "data" contains the original list. Although there will beI've lost track of what the actual requirements meanwhile are.
some differences in behaviour for some other data and I think your
python and the above will do unintuitive things with other data.
The pipeline goes as follows:
sed strips quotes
awk makes two delimited columns
sort sorts on surnames, and where surnames are identical on telno
column presents columns with a visual model
nl adds number prefixes
If I really try and use tee and fifos I can use many fewer tools.
None of these are really good languages/toolsets for the task. A DSL
with a data schema would be best.
(Also I quite like using bubble sort because so many deride it!)
Not sure you can fix it - it's just how C++ looks.
fairly hideous to look at, and 6 includes required?
Even with the -Os compile flag you mentioned, the executable is 101MB.
On 3/16/2026 8:09 PM, Richard Harnden wrote:
I split the long line and got the '| \' backwards :(
Second attempt, this time with line numbers ...
$ awk -F\" '{printf("%-20s|%s\n", $2, $4)}' input.txt | \
sort -t\| -k1 | \
awk -F\| 'BEGIN {i=1} {printf("%2d. %-20s|%s\n", i,$1,$2); i++}' | \
tr -d "|"
1. Anna Becker 0170-2233445
2. Anna Fischer 0341-9988776
3. Anna Müller 0987-6543210
4. Ben Meier 0341-5566778
5. Ben Richter 069-3344556
[...]
47. Tim Becker 0151-1112223
48. Tim Richter 030-6677889
49. Tim Schäfer 0711-5566778
50. Tom Bauer 0171-1122334
Nice.
Next things to try are:
1) continue to output first-last name, but sort by last name-first name
2) instead of hard-coding 20, have it determine the longest name in the
input, and adjust the spacing so it always pretty-prints with no name
collision into the phone number column.
The python 12-liner for that is:
import sys
names = []
longname = 0
with open(sys.argv[1],'r') as f:
for line in f:
line = line.replace('"','')
e = [i.strip() for i in line.split(' ') if i]
ln = len(e[0]) + len(e[1])
if ln > longname: longname = ln
names.append((e[1], e[0], e[2]))
for i,n in enumerate(sorted(names)):
print("%2d. %-*s %s" % (i+1, longname+2, n[1]+' '+n[0], n[2]))
1. Emma Bauer 0157-9988776
2. Tom Bauer 0171-1122334
[...]
49. Clara Zimmermann 040-5566778
50. Leon Zimmermann 0711-1122445
On 2026-03-17 01:29, Bart wrote:
(Also I quite like using bubble sort because so many deride it!)
How miserable! (I feel so sorry for you.)
On 17/03/2026 01:45, DFS wrote:
On 3/16/2026 8:09 PM, Richard Harnden wrote:
I split the long line and got the '| \' backwards :(
Second attempt, this time with line numbers ...
$ awk -F\" '{printf("%-20s|%s\n", $2, $4)}' input.txt | \
sort -t\| -k1 | \
awk -F\| 'BEGIN {i=1} {printf("%2d. %-20s|%s\n", i,$1,$2); i++}' >>> | \
tr -d "|"
1. Anna Becker 0170-2233445
2. Anna Fischer 0341-9988776
3. Anna Müller 0987-6543210
4. Ben Meier 0341-5566778
5. Ben Richter 069-3344556
[...]
47. Tim Becker 0151-1112223
48. Tim Richter 030-6677889
49. Tim Schäfer 0711-5566778
50. Tom Bauer 0171-1122334
Nice.
Next things to try are:
1) continue to output first-last name, but sort by last name-first name
2) instead of hard-coding 20, have it determine the longest name in the
input, and adjust the spacing so it always pretty-prints with no name >> collision into the phone number column.
The python 12-liner for that is:
import sys
names = []
longname = 0
with open(sys.argv[1],'r') as f:
for line in f:
line = line.replace('"','')
e = [i.strip() for i in line.split(' ') if i]
ln = len(e[0]) + len(e[1])
if ln > longname: longname = ln
names.append((e[1], e[0], e[2]))
for i,n in enumerate(sorted(names)):
print("%2d. %-*s %s" % (i+1, longname+2, n[1]+' '+n[0], n[2]))
1. Emma Bauer 0157-9988776
2. Tom Bauer 0171-1122334
[...]
49. Clara Zimmermann 040-5566778
50. Leon Zimmermann 0711-1122445
18 line ksh ...
#!/bin/kshA version in GNU Awk (this time sorted on given name for simplicity):
typeset -i MAX=$(
while read FIRST LAST PHONE
do
NAME="${FIRST} ${LAST}"
echo ${#NAME}
done <input.txt |\
sort -nr |\
head -1
)
tr -d \" <input.txt |\
awk -v MAX=${MAX} '{printf("%-*s%s\n", MAX, $1" "$2, $3)}' |\
sort -k2 -k1 |\
nl -w2 -s". "
return 0
Output:
[...]
On 17/03/2026 04:38, Janis Papanagnou wrote:
On 2026-03-17 01:29, Bart wrote:
(Also I quite like using bubble sort because so many deride it!)
How miserable! (I feel so sorry for you.)
Why would that be miserable?
[...]
Bubble-sorting even 1000 strings takes about 10ms, which is small
fraction of overall build-time of a library exporting 1000 functions.
For a mere 100 functions, sort time is negligible.
On 2026-03-17 11:42, Richard Harnden wrote:
On 17/03/2026 01:45, DFS wrote:
On 3/16/2026 8:09 PM, Richard Harnden wrote:
I split the long line and got the '| \' backwards :(
Second attempt, this time with line numbers ...
$ awk -F\" '{printf("%-20s|%s\n", $2, $4)}' input.txt | \
sort -t\| -k1 | \
awk -F\| 'BEGIN {i=1} {printf("%2d. %-20s|%s\n", i,$1,$2); i+ >>>> +}' | \
tr -d "|"
1. Anna Becker 0170-2233445
2. Anna Fischer 0341-9988776
3. Anna Müller 0987-6543210
4. Ben Meier 0341-5566778
5. Ben Richter 069-3344556
[...]
47. Tim Becker 0151-1112223
48. Tim Richter 030-6677889
49. Tim Schäfer 0711-5566778
50. Tom Bauer 0171-1122334
Nice.
Next things to try are:
1) continue to output first-last name, but sort by last name-first name
2) instead of hard-coding 20, have it determine the longest name in the
input, and adjust the spacing so it always pretty-prints with no >>> name
collision into the phone number column.
The python 12-liner for that is:
import sys
names = []
longname = 0
with open(sys.argv[1],'r') as f:
for line in f:
line = line.replace('"','')
e = [i.strip() for i in line.split(' ') if i]
ln = len(e[0]) + len(e[1])
if ln > longname: longname = ln
names.append((e[1], e[0], e[2]))
for i,n in enumerate(sorted(names)):
print("%2d. %-*s %s" % (i+1, longname+2, n[1]+' '+n[0], n[2])) >>>
1. Emma Bauer 0157-9988776
2. Tom Bauer 0171-1122334
[...]
49. Clara Zimmermann 040-5566778
50. Leon Zimmermann 0711-1122445
18 line ksh ...
(I would count that as 11 "net lines", ignoring non-functional lines.)
#!/bin/kshA version in GNU Awk (this time sorted on given name for simplicity):
typeset -i MAX=$(
while read FIRST LAST PHONE
do
NAME="${FIRST} ${LAST}"
echo ${#NAME}
done <input.txt |\
sort -nr |\
head -1
)
tr -d \" <input.txt |\
awk -v MAX=${MAX} '{printf("%-*s%s\n", MAX, $1" "$2, $3)}' |\
sort -k2 -k1 |\
nl -w2 -s". "
return 0
Output:
[...]
{ line = gensub (/[^"]*"([^"]*)"[ \t]*"([^"]*)"/, "\\1:\\2", "g")
split (line, data, ":")
if ((len = length (data[1])) > max) max = len
list[data[1]] = data[2]
}
END { PROCINFO["sorted_in"] = "@ind_str_asc"
w = length (NR)
for (d in list)
printf "%*d. %-*s %s\n", w, ++c, max, d, list[d]
}
(with width of the sequence number also computed, not hard-coded,
and single-pass).
Janis
On 3/16/2026 8:09 PM, Richard Harnden wrote:
I split the long line and got the '| \' backwards :(
Second attempt, this time with line numbers ...
$ awk -F\" '{printf("%-20s|%s\n", $2, $4)}' input.txt | \
sort -t\| -k1 | \
awk -F\| 'BEGIN {i=1} {printf("%2d. %-20s|%s\n", i,$1,$2); i++}' | \
tr -d "|"
1. Anna Becker 0170-2233445
2. Anna Fischer 0341-9988776
3. Anna Müller 0987-6543210
4. Ben Meier 0341-5566778
5. Ben Richter 069-3344556
[...]
47. Tim Becker 0151-1112223
48. Tim Richter 030-6677889
49. Tim Schäfer 0711-5566778
50. Tom Bauer 0171-1122334
Nice.
Next things to try are:
1) continue to output first-last name, but sort by last name-first name
2) instead of hard-coding 20, have it determine the longest name in the
input, and adjust the spacing so it always pretty-prints with no name
collision into the phone number column.
The python 12-liner for that is:
import sys
names = []
longname = 0
with open(sys.argv[1],'r') as f:
for line in f:
line = line.replace('"','')
e = [i.strip() for i in line.split(' ') if i]
ln = len(e[0]) + len(e[1])
if ln > longname: longname = ln
names.append((e[1], e[0], e[2]))
for i,n in enumerate(sorted(names)):
print("%2d. %-*s %s" % (i+1, longname+2, n[1]+' '+n[0], n[2]))
On 2026-03-17 12:47, Bart wrote:
On 17/03/2026 04:38, Janis Papanagnou wrote:
On 2026-03-17 01:29, Bart wrote:
(Also I quite like using bubble sort because so many deride it!)
How miserable! (I feel so sorry for you.)
Why would that be miserable?
Because of the reason you pretend for using it.
[...]
Bubble-sorting even 1000 strings takes about 10ms, which is small
fraction of overall build-time of a library exporting 1000 functions.
For a mere 100 functions, sort time is negligible.
And because of reason here based on arbitrary absolute times instead
of actual algorithmic complexity.
On 3/16/2026 4:57 PM, Scott Lurndal wrote:
DFS <nospam@dfs.com> writes:
On 3/14/2026 8:15 AM, Bart wrote:
On 14/03/2026 06:53, Bonita Montero wrote:
Eh, 35 lines. I forget to remove namePad.
<snip>
Your C++ always looks like total gobbledygook.
Not sure you can fix it - it's just how C++ looks.
Without using or looking back at Montero's code, I did my own research
and replicated this little bit of functionality using recommended C++
style and libraries and functions:
read file in
clean data (remove whitespace, remove quote marks)
sort
Technically, your example removes _leading_ and _trailing_
whitespace, no?
Yes.
$ sed -e 's/^[ \t]*//;s/[ \t]*$//' -e 's/"//' < inputfile | sort
This is what I got from that:
Anna Becker" "0170-2233445"
Anna Fischer" "0341-9988776"
Anna Müller" "0987-6543210"
Ben Meier" "0341-5566778"
Ben Richter" "069-3344556"
...
Not what we're looking for.
On 17/03/2026 12:08, Janis Papanagnou wrote:
On 2026-03-17 12:47, Bart wrote:
On 17/03/2026 04:38, Janis Papanagnou wrote:
On 2026-03-17 01:29, Bart wrote:
(Also I quite like using bubble sort because so many deride it!)
How miserable! (I feel so sorry for you.)
Why would that be miserable?
Because of the reason you pretend for using it.
[...]
Bubble-sorting even 1000 strings takes about 10ms, which is small
fraction of overall build-time of a library exporting 1000 functions.
For a mere 100 functions, sort time is negligible.
And because of reason here based on arbitrary absolute times instead
of actual algorithmic complexity.
No, timing is considered relative to the rest of the task.
If this ever became a bottleneck then it is a trivial upgrade to a
better routine.
[...]
On 2026-03-17 13:37, Bart wrote:Bubble sort is not the worst existing, far from it. It is O(n**2).
On 17/03/2026 12:08, Janis Papanagnou wrote:
On 2026-03-17 12:47, Bart wrote:
On 17/03/2026 04:38, Janis Papanagnou wrote:
On 2026-03-17 01:29, Bart wrote:
(Also I quite like using bubble sort because so many deride
it!)
How miserable! (I feel so sorry for you.)
Why would that be miserable?
Because of the reason you pretend for using it.
[...]
Bubble-sorting even 1000 strings takes about 10ms, which is small
fraction of overall build-time of a library exporting 1000
functions.
For a mere 100 functions, sort time is negligible.
And because of reason here based on arbitrary absolute times
instead of actual algorithmic complexity.
No, timing is considered relative to the rest of the task.
If this ever became a bottleneck then it is a trivial upgrade to a
better routine.
If there are simple better algorithms there's no reason but
ignorance to not use them in the first place! - It's really
stupid to deliberately use inferior, or (as here) even the
worst existing algorithms.
- What do professionals? - Let'sAll that is good, except of use of CDC mainframe in 1980s, which I'd
have a look at a Quicksort implementation on a CDC mainframe
back in the 1980's. They implemented it in a way that scales
according to the CS state-of-the-art; divide-et-impera for
the Quicksort, pick pivot element from a set of three, and
sort divided runs of length smaller than 10 with a straight
insertion sort algorithm. -
No educated computer scientist
would have decided to choose Bubblesort in any place here.
Let alone for your "reason" that this algorithm is "derided".
There's a reason why it's derided; that's called scientific
analysis of its quality.
Janis
[...]
Bubble sort is not the worst existing, far from it. It is O(n**2).
Some algos, which names I don't remember are O(n**3).
[...]
All that is good, except of use of CDC mainframe in 1980s,
which I'd
consider more sub-optimal choice than bubble sort algorithm.
On Wed, 18 Mar 2026 02:40:50 +0100
Janis Papanagnou <janis_papanagnou+ng@hotmail.com> wrote:
On 2026-03-17 13:37, Bart wrote:
On 17/03/2026 12:08, Janis Papanagnou wrote:
On 2026-03-17 12:47, Bart wrote:
On 17/03/2026 04:38, Janis Papanagnou wrote:
On 2026-03-17 01:29, Bart wrote:
(Also I quite like using bubble sort because so many deride
it!)
How miserable! (I feel so sorry for you.)
Why would that be miserable?
Because of the reason you pretend for using it.
[...]
Bubble-sorting even 1000 strings takes about 10ms, which is small
fraction of overall build-time of a library exporting 1000
functions.
For a mere 100 functions, sort time is negligible.
And because of reason here based on arbitrary absolute times
instead of actual algorithmic complexity.
No, timing is considered relative to the rest of the task.
If this ever became a bottleneck then it is a trivial upgrade to a
better routine.
If there are simple better algorithms there's no reason but
ignorance to not use them in the first place! - It's really
stupid to deliberately use inferior, or (as here) even the
worst existing algorithms.
Bubble sort is not the worst existing, far from it. It is O(n**2).
Some algos, which names I don't remember are O(n**3).
The problem with bubble sort is that relatively to other popular
O(n**2) sorting algorithms, i.e. using Knuth's nomenclature, Straight Insertion and Straight Selection, it is not just measurably slower,
but also a little more complicated to code.
However it has one pro point to it: it is very fast when applied to
almost sorted data sets.
On 2026-03-18 10:21, Michael S wrote:
Bubble sort is not the worst existing, far from it. It is O(n**2).
It is the worst. It's used in academia to show the most primitive
algorithm and compare all others against it (as the lowest bound).
On 2026-03-17 01:49, Tristan Wibberley wrote:
On 16/03/2026 23:07, DFS wrote:
import sys
names = []
longname = 0
with open(sys.argv[1],'r') as f:
for line in f:
line = line.replace('"','')
e = [i.strip() for i in line.split(' ') if i]
ln = len(e[0]) + len(e[1])
if ln > longname: longname = ln
names.append((e[1], e[0], e[2]))
for i,n in enumerate(sorted(names)):
print("%2d. %-*s %s" % (i+1, longname+2, n[1]+' '+n[0], n[2])) >>>
look more clumsy, a bit like that Python script above.)
On 3/17/2026 12:21 AM, Janis Papanagnou wrote:
On 2026-03-17 01:49, Tristan Wibberley wrote:
On 16/03/2026 23:07, DFS wrote:
import sys
names = []
longname = 0
with open(sys.argv[1],'r') as f:
for line in f:
line = line.replace('"','')
e = [i.strip() for i in line.split(' ') if i]
ln = len(e[0]) + len(e[1])
if ln > longname: longname = ln
names.append((e[1], e[0], e[2]))
for i,n in enumerate(sorted(names)):
print("%2d. %-*s %s" % (i+1, longname+2, n[1]+' '+n[0], n[2])) >>>>
look more clumsy, a bit like that Python script above.)
You misspelled beautiful.
You can put makeup on most code by using more descriptive names and
lining things up:
import sys
namelist = []
longname = 0
with open(sys.argv[1],'r') as file:
for line in file:
line = line.replace('"','')
elements = [parts.strip() for parts in line.split(' ') if parts]
thisname = len(elements[0]) + len(elements[1])
if thisname > longname: longname = thisname
namelist.append((elements[1], elements[0], elements[2]))
for count, name in enumerate(sorted(namelist)):
print("%2d. %-*s %s" % (count+1, longname+2, name[1] +' '+ name[0], name[2]))
On 18/03/2026 16:40, DFS wrote:
On 3/17/2026 12:21 AM, Janis Papanagnou wrote:
On 2026-03-17 01:49, Tristan Wibberley wrote:
On 16/03/2026 23:07, DFS wrote:
import sys
names = []
longname = 0
with open(sys.argv[1],'r') as f:
for line in f:
line = line.replace('"','')
e = [i.strip() for i in line.split(' ') if i]
ln = len(e[0]) + len(e[1])
if ln > longname: longname = ln
names.append((e[1], e[0], e[2]))
for i,n in enumerate(sorted(names)):
print("%2d. %-*s %s" % (i+1, longname+2, n[1]+' '+n[0], n[2])) >>>>>
look more clumsy, a bit like that Python script above.)
You misspelled beautiful.
You can put makeup on most code by using more descriptive names and
lining things up:
import sys
namelist = []
longname = 0
with open(sys.argv[1],'r') as file:
for line in file:
line = line.replace('"','')
elements = [parts.strip() for parts in line.split(' ') if parts]
thisname = len(elements[0]) + len(elements[1])
if thisname > longname: longname = thisname
namelist.append((elements[1], elements[0], elements[2]))
for count, name in enumerate(sorted(namelist)):
print("%2d. %-*s %s" % (count+1, longname+2, name[1] +' '+
name[0], name[2]))
I don't think that helps!
It's exactly the same, somewhat indigestible lump of code;
the longer names just obscure things more.
Some blank lines wouldn't have gone amiss; those don't contribute to line-count.
However, the approach used could be different. The input is something
like this, bounded by <>:
<..."abc def"..."ghi"...>
the ... represent unwanted white space.
Your first step is to get rid of those ", to end up with:
<...abc def...ghi...>
But now that white space, which had been neatly excluded by the quotes, because part of the content and needs dealing with.
Maybe also, the tel no contain embedded spaces, or maybe the name is
only a first or last name, or there is a middle name. So removing the
quotes also removed the demarcation between name and tel no.
In my scripted version, the 'read' process uses the quotes to delimit
each of the two items; it will be read <abc def> and <ghi>.
I can do something that works with the provided data with util-linux,
sed, awk, and coreutils:
{ sed 's/"//g' | awk '{print $1 " " $2 ":" $3}' | sort -k2 | column -t
-s: | nl -s". " ; } < data
where the file "data" contains the original list. Although there will be
some differences in behaviour for some other data and I think your
python and the above will do unintuitive things with other data.
The pipeline goes as follows:
sed strips quotes
awk makes two delimited columns
sort sorts on surnames, and where surnames are identical on telno
column presents columns with a visual model
nl adds number prefixes
If I really try and use tee and fifos I can use many fewer tools.
None of these are really good languages/toolsets for the task. A DSL
with a data schema would be best.
On 18/03/2026 10:21, Michael S wrote:
Bubble sort is not the worst existing, far from it. It is O(n**2).
Some algos, which names I don't remember are O(n**3).
Bogosort is O(n * n!) on average - keep rearranging the elements at
random, then check if they are sorted. (The worst case is only bounded complexity if your random generator is pseduo-random rather than truly random.)
O(n²) worst-case is not uncommon, even amongst sorting algorithms that
are quite smart, like quicksort.
The problem with bubble sort is that relatively to other popular
O(n**2) sorting algorithms, i.e. using Knuth's nomenclature, Straight
Insertion and Straight Selection, it is not just measurably slower,
but also a little more complicated to code.
I am not convinced on the complexity point - but it will vary by
programming language, and what you consider "complicated".
[...]
[***] Here Bonita Montero with his enthusiasm for C++ has a point
that cannot be derived.
Janis
[...]
On 18/03/2026 09:49, Janis Papanagnou wrote:
On 2026-03-18 10:21, Michael S wrote:
Bubble sort is not the worst existing, far from it. It is O(n**2).
It is the worst. It's used in academia to show the most primitive
algorithm and compare all others against it (as the lowest bound).
My use of it is within a compiler that can approach a million lines per second throughput.
Yet when I point out that most other compilers are far slower, even for
the same standard of generated code, then that's perfectly fine!
c:\mx>tim .\mm -time -dll big\fann4x
Compiling big\fann4x.m to big\fann4x.dll
...
-----------------------------
Total: 745 ms 100.0 % # internal compile time
Time: 0.817 # overall
Test input is 740Kloc, 10,000 functions, of which 1,000 are exported to
the DLL. DLL binary is 5.6MB.
Those 1000 function names are sorted via bubble-sort, which will be something over 10ms of total compile-time.
This is the equivalent in C, using gcc:
c:\cx\big>tim gcc -shared fann4x.c -s -o fann4x.dll
Time: 44.067
1000 of the 10000 functions are not static, which here is sufficient to export them from the DLL. DLL binary is 9.6MB.
I like to use the simplest possible algorithms unless there's a pressing reason to use something more elaborate.
On 3/18/2026 1:06 PM, Bart wrote:
It helps a lot!
It goes from above avg python/pseudocode to great python/pseudocode.
It really can't be improved. I'm not even sure it can be shortened and still maintain the same functionality.
Maybe I could combine the .replace() and the .split() to save one line,
but it's not worth it.
elements = [parts.strip() for parts in line.replace('"','').split(' ')
if parts]
Didn't try it.
You can construct fantastic one-liners in python, but like a long regex they're more trouble to write and read than they're worth.
It's exactly the same, somewhat indigestible lump of code;
eh? It's rare that python code is labeled "indigestible".
If it was perl or Montero C++ you'd be correct.
I notice you didn't deliver any Q code. Why not?
Some blank lines wouldn't have gone amiss; those don't contribute to
line-count.
Here I didn't test for blank lines. They didn't exist in the input, and rarely exist in real life.
In my scripted version, the 'read' process uses the quotes to delimit
each of the two items; it will be read <abc def> and <ghi>.
Then you can't sort by last-first, but output by first-last.
Bart <bc@freeuk.com> wrote:
On 18/03/2026 09:49, Janis Papanagnou wrote:
On 2026-03-18 10:21, Michael S wrote:
Bubble sort is not the worst existing, far from it. It is O(n**2).
It is the worst. It's used in academia to show the most primitive
algorithm and compare all others against it (as the lowest bound).
My use of it is within a compiler that can approach a million lines per
second throughput.
Yet when I point out that most other compilers are far slower, even for
the same standard of generated code, then that's perfectly fine!
c:\mx>tim .\mm -time -dll big\fann4x
Compiling big\fann4x.m to big\fann4x.dll
...
-----------------------------
Total: 745 ms 100.0 % # internal compile time
Time: 0.817 # overall
Test input is 740Kloc, 10,000 functions, of which 1,000 are exported to
the DLL. DLL binary is 5.6MB.
Those 1000 function names are sorted via bubble-sort, which will be
something over 10ms of total compile-time.
This is the equivalent in C, using gcc:
c:\cx\big>tim gcc -shared fann4x.c -s -o fann4x.dll
Time: 44.067
1000 of the 10000 functions are not static, which here is sufficient to
export them from the DLL. DLL binary is 9.6MB.
Well, some day will come Bart^2 and feed your compiler with different
data. One on my codebases is about 210 k wc lines (about 120 k sloc).
There is about 15000 functions there, of which about 8000 is exported.
So, the thing is much smaller than your test file, but has much
more exported functions. If you scale it keeping the same
average function size and proportion of exported functions you
will quickly arrive at cases when this single sort dominates.
FYI, I have an old Modula-2 to C translater (written in late eighties
by a team in Karlsruche). It is reasonably fast handling 10000 lines,
it is hard to exactly measure execution time, but speed is
somewhere between 150 klps and 750 klps (not as fast as your
compiler, but quite respectable IMO).
On smaller files (and possibly
this one) its execution time seem to be dominated by startup time.
But it slows down on bigger files. For curiosity I tracked the
problem and it is in handling of symbol table. While most
things use asymptoticaly fast algorithms parts that looks up
symbols is quadratic and this single thing dominates runtime.
I like to use the simplest possible algorithms unless there's a pressing
reason to use something more elaborate.
Heapsort is only marginaly more complex than bubble sort
On 18/03/2026 10:21, Michael S wrote:I didn't mean something intentionally crippled.
On Wed, 18 Mar 2026 02:40:50 +0100
Janis Papanagnou <janis_papanagnou+ng@hotmail.com> wrote:
On 2026-03-17 13:37, Bart wrote:
On 17/03/2026 12:08, Janis Papanagnou wrote:
On 2026-03-17 12:47, Bart wrote:
On 17/03/2026 04:38, Janis Papanagnou wrote:
On 2026-03-17 01:29, Bart wrote:
(Also I quite like using bubble sort because so many deride
it!)
How miserable! (I feel so sorry for you.)
Why would that be miserable?
Because of the reason you pretend for using it.
[...]
Bubble-sorting even 1000 strings takes about 10ms, which is
small fraction of overall build-time of a library exporting 1000
functions.
For a mere 100 functions, sort time is negligible.
And because of reason here based on arbitrary absolute times
instead of actual algorithmic complexity.
No, timing is considered relative to the rest of the task.
If this ever became a bottleneck then it is a trivial upgrade to a
better routine.
If there are simple better algorithms there's no reason but
ignorance to not use them in the first place! - It's really
stupid to deliberately use inferior, or (as here) even the
worst existing algorithms.
Bubble sort is not the worst existing, far from it. It is O(n**2).
Some algos, which names I don't remember are O(n**3).
Bogosort is O(n * n!) on average - keep rearranging the elements at
random, then check if they are sorted. (The worst case is only
bounded complexity if your random generator is pseduo-random rather
than truly random.)
O(n) worst-case is not uncommon, even amongst sorting algorithms
that are quite smart, like quicksort.
The problem with bubble sort is that relatively to other popular
O(n**2) sorting algorithms, i.e. using Knuth's nomenclature,
Straight Insertion and Straight Selection, it is not just
measurably slower, but also a little more complicated to code.
I am not convinced on the complexity point - but it will vary by
programming language, and what you consider "complicated".
However it has one pro point to it: it is very fast when applied to
almost sorted data sets.
It is also stable, has no memory overhead, exchanges pairs of data
in-place, and only ever exchanges adjacent data. These can also be advantages in some situations. (Of course there are alternative
sorting algorithms that have these same advantages and are, at least
usually, more efficient.)
I don't think pure bubblesort has much serious use outside
educational purposes, but it is easy to understand, easy to implement correctly in pretty much any language, and can do a perfectly good
job if you don't need efficient sorting of large datasets (for small
enough datasets, a hand-written bubblesort in C will be faster than
calling qsort).
But I also think people sometimes jump to simplistic views of
efficiency, as though a sorting algorithm can be boiled down to just
a single "O(f(n))" complexity. With enough data, things like cache coherency matter more than operation counts - and with little data, efficiency often doesn't matter at all.
On 2026-03-18 11:20, David Brown wrote:
On 18/03/2026 10:21, Michael S wrote:
Bubble sort is not the worst existing, far from it. It is O(n**2).
Some algos, which names I don't remember are O(n**3).
Bogosort is O(n * n!) on average - keep rearranging the elements at
random, then check if they are sorted. (The worst case is only
bounded complexity if your random generator is pseduo-random rather
than truly random.)
Just to make clear; the key point is that it makes no sense to
implement sorting algorithms of complexities worse that O(N²).
With that complexity you can compare and move every element to
be sorted with any other element.[*] So Michael's hypothetical
O(N**3) algorithm would be just nonsense (or ignorance) for
any sensible application in practice.
[*] It would otherwise be like implementing some operations on
linear lists with an algorithm worse than O(N). Depending on
the actual function you usually strive for O(log N) or O(1) by
organizing your data appropriately, but never worse than O(N).
O(n²) worst-case is not uncommon, even amongst sorting algorithms that
are quite smart, like quicksort.
Yes. It's indeed not widely known that one of the [practically]
fastest algorithm is that "bad" (concerning actual complexity).
The point is, for one, that it's on average much better; just
in very special cases it's getting quadratic.[**] And the other
point is that you can manage its complexity by means mentioned
in my previous post (like using a medium-of-three pivot element,
for example). (Quicksort has in that respect been thoroughly
examined already in the 1980's and various optimizations have
been developed.)
[**] As opposed to Bubblesort that shows acceptable behavior
just in the very rare special case of already sorted elements.
But then one should consider to not *sort* the data in the first
place but *insert* a new element at the right place to keep the
sorting order.
The problem with bubble sort is that relatively to other popular
O(n**2) sorting algorithms, i.e. using Knuth's nomenclature, Straight
Insertion and Straight Selection, it is not just measurably slower,
but also a little more complicated to code.
I am not convinced on the complexity point - but it will vary by
programming language, and what you consider "complicated".
If you're lucky you use a programming language where you don't
have to implement basic things like the sorting algorithm.[***]
My expectation is that standard library functions would support
sophisticated efficient O(N log N) algorithms out of the box.
[***] Here Bonita Montero with his enthusiasm for C++ has a point
that cannot be derived. I would also not be surprised if qsort(3)
from the standard "C" lib would be based on Quicksort or Quicksort
hybrids (but I've never checked). In C++'s STL you get complexities
of algorithms guaranteed at least; that's sensible CS/IT software
design!
On Wed, 18 Mar 2026 11:20:03 +0100
David Brown <david.brown@hesbynett.no> wrote:
On 18/03/2026 10:21, Michael S wrote:
On Wed, 18 Mar 2026 02:40:50 +0100
Janis Papanagnou <janis_papanagnou+ng@hotmail.com> wrote:
On 2026-03-17 13:37, Bart wrote:
On 17/03/2026 12:08, Janis Papanagnou wrote:
On 2026-03-17 12:47, Bart wrote:
On 17/03/2026 04:38, Janis Papanagnou wrote:
On 2026-03-17 01:29, Bart wrote:
(Also I quite like using bubble sort because so many deride
it!)
How miserable! (I feel so sorry for you.)
Why would that be miserable?
Because of the reason you pretend for using it.
[...]
Bubble-sorting even 1000 strings takes about 10ms, which is
small fraction of overall build-time of a library exporting 1000 >>>>>>> functions.
For a mere 100 functions, sort time is negligible.
And because of reason here based on arbitrary absolute times
instead of actual algorithmic complexity.
No, timing is considered relative to the rest of the task.
If this ever became a bottleneck then it is a trivial upgrade to a
better routine.
If there are simple better algorithms there's no reason but
ignorance to not use them in the first place! - It's really
stupid to deliberately use inferior, or (as here) even the
worst existing algorithms.
Bubble sort is not the worst existing, far from it. It is O(n**2).
Some algos, which names I don't remember are O(n**3).
Bogosort is O(n * n!) on average - keep rearranging the elements at
random, then check if they are sorted. (The worst case is only
bounded complexity if your random generator is pseduo-random rather
than truly random.)
I didn't mean something intentionally crippled.
I remember seeing examples of worse than O(n**2) algorithms that at
first glance look reasonable. I don't remember details.
O(n²) worst-case is not uncommon, even amongst sorting algorithms
that are quite smart, like quicksort.
The problem with bubble sort is that relatively to other popular
O(n**2) sorting algorithms, i.e. using Knuth's nomenclature,
Straight Insertion and Straight Selection, it is not just
measurably slower, but also a little more complicated to code.
I am not convinced on the complexity point - but it will vary by
programming language, and what you consider "complicated".
The language is 'C'.
Here are implementations of Straight Insertion and Straight Select.
Show me implementation of Bubble sort that is not at least a little
more complicated.
Remember that it has to be "real" bubble sort, not a simplified bubble
sort that does unnecessary work by starting each time from the
beginning. Your variant shall avoid obviously unnecessary work and
shall be opportunistic, i.e. quick at handling almost sorted cases.
#include <stddef.h>
#include <string.h>
void straight_insertion_sort(const char** buffer, size_t n)
{
for (size_t i = 1; i < n; ++i) {
const char* a = buffer[i];
size_t k;
for (k = i; k != 0; --k) {
if (!(strcmp(a, buffer[k-1]) < 0))
break;
buffer[k] = buffer[k-1];
}
buffer[k] = a;
}
}
void straight_select_sort(const char** buffer, size_t n)
{
for (size_t i = 1; i < n; ++i) {
const char* mn = buffer[i];
size_t mn_k = i;
for (size_t k = i+1; k < n; ++k) {
if (strcmp(buffer[k], mn) < 0) {
mn = buffer[k];
mn_k = k;
}
}
buffer[mn_k] = buffer[i];
buffer[i] = mn;
}
}
However it has one pro point to it: it is very fast when applied to
almost sorted data sets.
It is also stable, has no memory overhead, exchanges pairs of data
in-place, and only ever exchanges adjacent data. These can also be
advantages in some situations. (Of course there are alternative
sorting algorithms that have these same advantages and are, at least
usually, more efficient.)
I don't think pure bubblesort has much serious use outside
educational purposes, but it is easy to understand, easy to implement
correctly in pretty much any language, and can do a perfectly good
job if you don't need efficient sorting of large datasets (for small
enough datasets, a hand-written bubblesort in C will be faster than
calling qsort).
IMHO, Bubble Sort is harder to understand then Straight Select Sort.
But I also think people sometimes jump to simplistic views of
efficiency, as though a sorting algorithm can be boiled down to just
a single "O(f(n))" complexity. With enough data, things like cache
coherency matter more than operation counts - and with little data,
efficiency often doesn't matter at all.
Cache coherency does not matter until you start to parallelize.
Things that even at moderate N could matter more than operation counts
are predictability of branches and locality of of array access.
But it applies only to cases when key comparison is quick and records
are small. When either of two conditions does not hold you are back at operation count (either # of comparisons or # of moves) as a main
bottleneck.
C++'s sorts have the advantage of being template based, and thus can
be more efficient than generic memcpy() and comparison functions for
C's qsort.
On 2026-03-18 11:20, David Brown wrote:
On 18/03/2026 10:21, Michael S wrote:
Bubble sort is not the worst existing, far from it. It is O(n**2).
Some algos, which names I don't remember are O(n**3).
Bogosort is O(n * n!) on average - keep rearranging the elements at
random, then check if they are sorted. (The worst case is only bounded
complexity if your random generator is pseduo-random rather than truly
random.)
Just to make clear; the key point is that it makes no sense to
implement sorting algorithms of complexities worse that O(N²).
With that complexity you can compare and move every element to
be sorted with any other element.[*] So Michael's hypothetical
O(N**3) algorithm would be just nonsense (or ignorance) for
any sensible application in practice.
[*] It would otherwise be like implementing some operations on
linear lists with an algorithm worse than O(N). Depending on
the actual function you usually strive for O(log N) or O(1) by
organizing your data appropriately, but never worse than O(N).
On 19/03/2026 10:19, Michael S wrote:
I didn't mean something intentionally crippled.
I remember seeing examples of worse than O(n**2) algorithms that at
first glance look reasonable. I don't remember details.
Now I am curious! Perhaps it was dependent on what you were counting as operations - some algorithms aim to minimise swaps or moves, even if it costs more in comparisons or memory usage. Without having the details
in my head, I could believe that an algorithm that aimed to move each element at most once could require more than O(n²) comparisons.
On Thu, 19 Mar 2026 10:43:08 +0100
David Brown <david.brown@hesbynett.no> wrote:
C++'s sorts have the advantage of being template based, and thus can
be more efficient than generic memcpy() and comparison functions for
C's qsort.
The problem with C qsort is not just lack of efficiency. It's also
lacke of flexibilty at API level. In my pratice, in about 30% of the
cases sorting, C qsort will either didn't do what I want at all or at
very least would not do it in thread-safe manner. qsort_r() (POSIX ?)
is a more flexible API that should have been part of C Standard at least since C11. But it still is not.
On Wed, 18 Mar 2026 11:20:03 +0100
David Brown <david.brown@hesbynett.no> wrote:
The problem with bubble sort is that relatively to other popular
O(n**2) sorting algorithms, i.e. using Knuth's nomenclature,
Straight Insertion and Straight Selection, it is not just
measurably slower, but also a little more complicated to code.
I am not convinced on the complexity point - but it will vary by
programming language, and what you consider "complicated".
The language is 'C'.
Here are implementations of Straight Insertion and Straight Select.
Show me implementation of Bubble sort that is not at least a little
more complicated.
Remember that it has to be "real" bubble sort, not a simplified bubble
sort that does unnecessary work by starting each time from the
beginning. Your variant shall avoid obviously unnecessary work and
shall be opportunistic, i.e. quick at handling almost sorted cases.
#include <stddef.h>
#include <string.h>
void straight_insertion_sort(const char** buffer, size_t n)
{
for (size_t i = 1; i < n; ++i) {
const char* a = buffer[i];
size_t k;
for (k = i; k != 0; --k) {
if (!(strcmp(a, buffer[k-1]) < 0))
break;
buffer[k] = buffer[k-1];
}
buffer[k] = a;
}
}
void straight_select_sort(const char** buffer, size_t n)
{
for (size_t i = 1; i < n; ++i) {
const char* mn = buffer[i];
size_t mn_k = i;
for (size_t k = i+1; k < n; ++k) {
if (strcmp(buffer[k], mn) < 0) {
mn = buffer[k];
mn_k = k;
}
}
buffer[mn_k] = buffer[i];
buffer[i] = mn;
}
}
On 19/03/2026 11:23, Michael S wrote:
On Thu, 19 Mar 2026 10:43:08 +0100
David Brown <david.brown@hesbynett.no> wrote:
C++'s sorts have the advantage of being template based, and thus can
be more efficient than generic memcpy() and comparison functions for
C's qsort.
The problem with C qsort is not just lack of efficiency. It's also
lacke of flexibilty at API level. In my pratice, in about 30% of the
cases sorting, C qsort will either didn't do what I want at all or at
very least would not do it in thread-safe manner. qsort_r() (POSIX ?)
is a more flexible API that should have been part of C Standard at least
since C11. But it still is not.
Yes, standard C qsort() has a lot of limitations. A thread-safe version >would fix one of these, but miss out on many others.
On 19/03/2026 09:19, Michael S wrote:
On Wed, 18 Mar 2026 11:20:03 +0100
David Brown <david.brown@hesbynett.no> wrote:
The problem with bubble sort is that relatively to other popular
O(n**2) sorting algorithms, i.e. using Knuth's nomenclature,
Straight Insertion and Straight Selection, it is not just
measurably slower, but also a little more complicated to code.
I am not convinced on the complexity point - but it will vary by
programming language, and what you consider "complicated".
The language is 'C'.
Here are implementations of Straight Insertion and Straight Select.
Show me implementation of Bubble sort that is not at least a little
more complicated.
Remember that it has to be "real" bubble sort, not a simplified
bubble sort that does unnecessary work by starting each time from
the beginning. Your variant shall avoid obviously unnecessary work
and shall be opportunistic, i.e. quick at handling almost sorted
cases.
#include <stddef.h>
#include <string.h>
void straight_insertion_sort(const char** buffer, size_t n)
{
for (size_t i = 1; i < n; ++i) {
const char* a = buffer[i];
size_t k;
for (k = i; k != 0; --k) {
if (!(strcmp(a, buffer[k-1]) < 0))
break;
buffer[k] = buffer[k-1];
}
buffer[k] = a;
}
}
void straight_select_sort(const char** buffer, size_t n)
{
for (size_t i = 1; i < n; ++i) {
I think this needs to start from 0.
const char* mn = buffer[i];
size_t mn_k = i;
for (size_t k = i+1; k < n; ++k) {
if (strcmp(buffer[k], mn) < 0) {
mn = buffer[k];
mn_k = k;
}
}
buffer[mn_k] = buffer[i];
buffer[i] = mn;
}
}
I ported both of these to scripting code, then compared against the
simplest version of bubble-sort.
They were faster, by 2.7x and 3.6x respectively. But both still had O(n-squared) behaviour.
Now that I have copies, I may possible use them next time I might
have written a bubble-sort. But I would only have done that anyway
when I knew its performance was tolerable or negligible.
Normally however (and again in scripting code) I'd use my built-in
sort() based on quicksort, which is nearly 1000 times faster than bubble-sort for my test (sort 20K random strings), and some 300x
faster than your routines. It's not O(n-squared) either.
So basically, if I don't care about speed or its not needed, I might
as well use bubble-sort.
The problem with C qsort is not just lack of efficiency. It's also
lacke of flexibilty at API level. In my pratice, in about 30% of the
cases sorting, C qsort will either didn't do what I want at all or at
very least would not do it in thread-safe manner. qsort_r() (POSIX ?)
is a more flexible API that should have been part of C Standard at least since C11. But it still is not.
On 19/03/2026 09:19, Michael S wrote:
On Wed, 18 Mar 2026 11:20:03 +0100
David Brown <david.brown@hesbynett.no> wrote:
Normally however (and again in scripting code) I'd use my built-in
sort() based on quicksort, which is nearly 1000 times faster than bubble-sort for my test (sort 20K random strings), and some 300x
faster than your routines. It's not O(n-squared) either.
Am 19.03.2026 um 11:23 schrieb Michael S:
The problem with C qsort is not just lack of efficiency. It's also
lacke of flexibilty at API level. In my pratice, in about 30% of the
cases sorting, C qsort will either didn't do what I want at all or
at very least would not do it in thread-safe manner. qsort_r()
(POSIX ?) is a more flexible API that should have been part of C
Standard at least since C11. But it still is not.
If you're not allowed to use global variables you can use thread
-local variables. These also have fixed addresses. So at the end
qsort() is thread-safe.
On Thu, 19 Mar 2026 14:49:16 +0000
Bart <bc@freeuk.com> wrote:
On 19/03/2026 09:19, Michael S wrote:
On Wed, 18 Mar 2026 11:20:03 +0100
David Brown <david.brown@hesbynett.no> wrote:
Normally however (and again in scripting code) I'd use my built-in
sort() based on quicksort, which is nearly 1000 times faster than
bubble-sort for my test (sort 20K random strings), and some 300x
faster than your routines. It's not O(n-squared) either.
For lexicographic sort of 20K random strings, plain quicksort is
probably quite sub-optimal.
If performance is important, I'd consider combined method: first
pre-sort by 3-char or 4-char prefix with Radix sort ('by LS character
first' variation of algorithm), then use quicksprt to sort sections
with the same prefix. For string taken from the real world it will not
work as well as for artificial random strings, but should still
significantly outperform plain quicksort.
On 19/03/2026 15:29, Michael S wrote:
On Thu, 19 Mar 2026 14:49:16 +0000
Bart <bc@freeuk.com> wrote:
On 19/03/2026 09:19, Michael S wrote:
On Wed, 18 Mar 2026 11:20:03 +0100
David Brown <david.brown@hesbynett.no> wrote:
Normally however (and again in scripting code) I'd use my built-in
sort() based on quicksort, which is nearly 1000 times faster than
bubble-sort for my test (sort 20K random strings), and some 300x
faster than your routines. It's not O(n-squared) either.
For lexicographic sort of 20K random strings, plain quicksort is
probably quite sub-optimal.
If performance is important, I'd consider combined method: first
pre-sort by 3-char or 4-char prefix with Radix sort ('by LS
character first' variation of algorithm), then use quicksprt to
sort sections with the same prefix. For string taken from the real
world it will not work as well as for artificial random strings,
but should still significantly outperform plain quicksort.
What do you think the slow-down would be? I set up another test,
sorting 1M random strings each exactly 16 characters long.
This is how long it took to sort via various means:
WSL shell 'sort': 2.3/3.5 seconds (real/user, from/to a file)
Windows 'sort': 4.2 seconds (from/to a file)
C's qsort: 0.5 seconds (gcc; initialised char*[]/inplace)
0.6 seconds (tcc)
My script lang: 2.3/2.8 seconds (sort only/all, file to in-memory)
(That last timing is somewhat remarkable given (1) that the sort
routine itself runs as interpreted, dynamic bytecode; (2) each string
compare involves calling C's 'strcmp' /after/ converting args to
ensure strings are zero-terminated.)
So how much faster ought it to be?
I've lost track of what the actual requirements meanwhile are.
Double-quoted names and numbers are the payload to extract?
On 3/16/2026 8:49 PM, Tristan Wibberley wrote:
I can do something that works with the provided data with util-linux,
sed, awk, and coreutils:
{ sed 's/"//g' | awk '{print $1 " " $2 ":" $3}' | sort -k2 | column -t
-s: | nl -s". " ; } < data
where the file "data" contains the original list. Although there will be
some differences in behaviour for some other data and I think your
python and the above will do unintuitive things with other data.
Emma Bauer:0157-9988776
Tom Bauer:0171-1122334
Tim Becker:0151-1112223
Anna Becker:0170-2233445
Marie Becker:040-4455667
Michael Braun:0170-9988776
Sarah Braun:040-7788990
You need numbering, and the names should remain together, and the phones left-aligned.
1. Emma Bauer 0157-9988776
2. Tom Bauer 0171-1122334
3. Tim Becker 0151-1112223
4. Anna Becker 0170-2233445
5. Marie Becker 040-4455667
6. Michael Braun 0170-9988776
7. Sarah Braun 040-7788990
On 19/03/2026 15:29, Michael S wrote:
On Thu, 19 Mar 2026 14:49:16 +0000
Bart <bc@freeuk.com> wrote:
On 19/03/2026 09:19, Michael S wrote:
On Wed, 18 Mar 2026 11:20:03 +0100
David Brown <david.brown@hesbynett.no> wrote:
Normally however (and again in scripting code) I'd use my built-in
sort() based on quicksort, which is nearly 1000 times faster than
bubble-sort for my test (sort 20K random strings), and some 300x
faster than your routines. It's not O(n-squared) either.
For lexicographic sort of 20K random strings, plain quicksort is
probably quite sub-optimal.
If performance is important, I'd consider combined method: first
pre-sort by 3-char or 4-char prefix with Radix sort ('by LS character
first' variation of algorithm), then use quicksprt to sort sections
with the same prefix. For string taken from the real world it will not
work as well as for artificial random strings, but should still
significantly outperform plain quicksort.
What do you think the slow-down would be? I set up another test, sorting
1M random strings each exactly 16 characters long.
This is how long it took to sort via various means:
WSL shell 'sort': 2.3/3.5 seconds (real/user, from/to a file)
Windows 'sort': 4.2 seconds (from/to a file)
C's qsort: 0.5 seconds (gcc; initialised char*[]/inplace)
0.6 seconds (tcc)
My script lang: 2.3/2.8 seconds (sort only/all, file to in-memory)
(That last timing is somewhat remarkable given (1) that the sort routine itself runs as interpreted, dynamic bytecode; (2) each string compare involves calling C's 'strcmp' /after/ converting args to ensure strings
are zero-terminated.)
So how much faster ought it to be?
On Thu, 19 Mar 2026 18:33:13 +0000
Bart <bc@freeuk.com> wrote:
On 19/03/2026 15:29, Michael S wrote:
On Thu, 19 Mar 2026 14:49:16 +0000
Bart <bc@freeuk.com> wrote:
On 19/03/2026 09:19, Michael S wrote:
On Wed, 18 Mar 2026 11:20:03 +0100
David Brown <david.brown@hesbynett.no> wrote:
Normally however (and again in scripting code) I'd use my built-in
sort() based on quicksort, which is nearly 1000 times faster than
bubble-sort for my test (sort 20K random strings), and some 300x
faster than your routines. It's not O(n-squared) either.
For lexicographic sort of 20K random strings, plain quicksort is
probably quite sub-optimal.
If performance is important, I'd consider combined method: first
pre-sort by 3-char or 4-char prefix with Radix sort ('by LS
character first' variation of algorithm), then use quicksprt to
sort sections with the same prefix. For string taken from the real
world it will not work as well as for artificial random strings,
but should still significantly outperform plain quicksort.
What do you think the slow-down would be? I set up another test,
sorting 1M random strings each exactly 16 characters long.
This is how long it took to sort via various means:
WSL shell 'sort': 2.3/3.5 seconds (real/user, from/to a file)
Windows 'sort': 4.2 seconds (from/to a file)
C's qsort: 0.5 seconds (gcc; initialised char*[]/inplace)
0.6 seconds (tcc)
My script lang: 2.3/2.8 seconds (sort only/all, file to in-memory)
(That last timing is somewhat remarkable given (1) that the sort
routine itself runs as interpreted, dynamic bytecode; (2) each string
compare involves calling C's 'strcmp' /after/ converting args to
ensure strings are zero-terminated.)
So how much faster ought it to be?
I don't understand the question. What answer could there possibly be
except for "There are no limits to perfection!" ?
That routine is given below (not my algorithm; I adapted it long ago).
-----------------------
void isort(char** data, int ll, int rr) {
char* temp;
int i = ll, j = rr;
char* pivot = data[(ll + rr) / 2];
do {
while (strcmp(pivot, data[i]) > 0 && i < rr) ++i;
while (strcmp(pivot, data[j]) < 0 && j > ll) --j;
if (i <= j) {
temp = data[i]; data[i] = data[j]; data[j] = temp;
++i;
--j;
}
} while (i <= j);
if (ll < j) isort(data, ll, j);
if (i < rr) isort(data, i, rr);
}
//For a char* array A of N elements, call as 'isort(A, 0, n-1)'.
Am 16.03.2026 um 21:43 schrieb DFS:
Even with the -Os compile flag you mentioned, the executable is 101MB....
Linux / clang++: ~190kiB.
Janis Papanagnou <janis_papanagnou+ng@hotmail.com> wrote:
On 2026-03-18 11:20, David Brown wrote:
On 18/03/2026 10:21, Michael S wrote:
Bubble sort is not the worst existing, far from it. It is O(n**2).
Some algos, which names I don't remember are O(n**3).
Bogosort is O(n * n!) on average - keep rearranging the elements at
random, then check if they are sorted. (The worst case is only bounded >>> complexity if your random generator is pseduo-random rather than truly
random.)
Just to make clear; the key point is that it makes no sense to
implement sorting algorithms of complexities worse that O(N²).
Well, in Prolog it is natural to implement sort in a way that
is equvalent to recursive search of ordering permuations.
And when you are sorting say 5 element lists it can work
quite well.
With that complexity you can compare and move every element to
be sorted with any other element.[*] So Michael's hypothetical
O(N**3) algorithm would be just nonsense (or ignorance) for
any sensible application in practice.
You assume O(1) access to elements. Take any O(N^2) array
sort and replace array by a list. Now your O(N^2) suddenly
becomes O(N^3).
Note that this can easily happen in dynamicaly
typed languages with true lists: such languages tend to have
"element access" operation which is O(1) for arrays but O(N)
for lists. It can happen in C if you write a "generic" sort
which takes accessors as arguments (eiter function pointer
arguments or via appropriate macrology) and plug in list
operations as arguemnts.
[*] It would otherwise be like implementing some operations on
linear lists with an algorithm worse than O(N). Depending on
the actual function you usually strive for O(log N) or O(1) by
organizing your data appropriately, but never worse than O(N).
You know about lists, but somewhat ignore possibility of
combining lists with O(N^2) sort.
Yes, that's fair enough. You have to go out of your way to make a
sorting algorithm that is worse than O(n²), so they will not be found in real code. But they certainly do exist. [...]
[...] But sometimes that is not the fastest tool for the job.
I've had occasion to need to sort arrays of numbers (ints or floats)
with a compile-time fixed small size - such as 4 or 6 entries. While I
did not use bubblesort, a bubblesort would have beaten standard C
library qsort by an order of magnitude.
The fun of sorting is that there is no single perfect algorithm that is always the best choice.
C++'s sorts have the advantage of being template based, and thus can be
more efficient than generic memcpy() and comparison functions for C's qsort. Usually, at least for larger datasets, sorts are hybrid
algorithms, switching between things like quicksort, insertion sort and
heap sort at different stages in order to maximise cache hits and to get
the balance between the "O" complexity and the constants in the O function. [...]
David Brown <david.brown@hesbynett.no> writes:
On 19/03/2026 11:23, Michael S wrote:
On Thu, 19 Mar 2026 10:43:08 +0100
David Brown <david.brown@hesbynett.no> wrote:
C++'s sorts have the advantage of being template based, and thus can
be more efficient than generic memcpy() and comparison functions for
C's qsort.
The problem with C qsort is not just lack of efficiency. It's also
lacke of flexibilty at API level. [...]
On the other hand, qsort() works just fine for relatively small
data sets which I suspect is the common use for qsort (aside from introductory programming exercises).
On Wed, 18 Mar 2026 11:20:03 +0100
David Brown <david.brown@hesbynett.no> wrote:
Bogosort is O(n * n!) on average - keep rearranging the elements at
random, then check if they are sorted. (The worst case is only
bounded complexity if your random generator is pseduo-random rather
than truly random.)
I didn't mean something intentionally crippled.
I remember seeing examples of worse than O(n**2) algorithms that at
first glance look reasonable. I don't remember details.
[...]
The language is 'C'.
Here are implementations of Straight Insertion and Straight Select.
Show me implementation of Bubble sort that is not at least a little
more complicated.
Remember that it has to be "real" bubble sort, not a simplified bubble
sort that does unnecessary work by starting each time from the
beginning. [...]
[...]
On 19/03/2026 19:40, Michael S wrote:
[...]
You said that plain quicksort (I guess that is what I used) is likely 'sub-optimal' and that your complex approach will 'significantly
outperform' it.
So I'm just asking 'by how much'?
[...]
That seems pretty crazy itself, what causes it to be more than 1kiB?
template instantiations not in a .so even though they're really really common?
How does one fix it without U/B due to not having the same definition of
an explicit instantiation throughout, i.e. via toolchain direction to
drop instantiations at link time because they're already available in a .so?
Ah, I now see you know all that already, so I could have spared some
writing. :-)
Quicksort (as opposed to qsort() - where the details are not[...]
obvious) is certainly a case for computer science lectures;
I'd only have hoped that these CS basics are broadly known,
also with non-CS educated or the many amateur programmers.
On 2026-03-20 00:53, Bart wrote:
On 19/03/2026 19:40, Michael S wrote:
[...]
You said that plain quicksort (I guess that is what I used) is
likely 'sub-optimal' and that your complex approach will
'significantly outperform' it.
I don't recall a statement "[quicksort] is *likely* 'sub-optimal'"
(emphasis by me); but if that has been said - and if the algorithm
was meant - it's of course nonsense.
Quicksort is, for significantly large data sets, more likely faster
even in its basic form if compared other plain O(N log N) algorithms
and especially if compared to any O(N^2) algorithms.
Even if it
exhibits in rare corner cases O(N^2) it typically outperforms other algorithms of O(N log N) class in practice on average random data.
So I'm just asking 'by how much'?
Concrete numbers are secondary here, they hide the basic
comprehension. The complexity classes show how the algorithms scale
with increasing data sets (there's various complexity measures, BTW,
with other bound criteria; which may be of interest to not get
repelled by the O(N^2) "threat" of Quicksort's worst case bound in
its primitive form).
Janis
Anecdotally; Google is known to ask in job interviews about complexity
of algorithms knowledge. A friend of mine who applied for a job had
been specifically asked about the big-O complexity of Quicksort.
[...]
Janis Papanagnou <janis_papanagnou+ng@hotmail.com> writes:
[...]
Quicksort (as opposed to qsort() - where the details are not[...]
obvious) is certainly a case for computer science lectures;
I'd only have hoped that these CS basics are broadly known,
also with non-CS educated or the many amateur programmers.
C doesn't specify which algorithm qsort() uses. The name is almost
certainly derived from the name of the Quicksort algorithm, but the
C standard only says that the contents of the array are sorted.
A conforming but perverse implemention could use Bubblesort or
Bogosort.
Even the GNU libc documentation doesn't say what algorithm that implementation uses. (In the source code, I see references to
mergesort and heapsort.)
On 20/03/2026 04:01, Janis Papanagnou wrote:
Ah, I now see you know all that already, so I could have spared some
writing. :-)
It's still nice to see we are on the same page. We all have daft ideas
or unexpected misunderstandings at times - things we've "always known"
that are actually completely wrong. So it's nice to get conformation
that others, thinking and writing independently, reach the same
conclusions.
(I'd expect that all the regulars here will know these things about
sorting algorithms, but there's always a chance that someone learns something from the posts.)
On Fri, 20 Mar 2026 05:05:57 +0100
Janis Papanagnou <janis_papanagnou+ng@hotmail.com> wrote:
On 2026-03-20 00:53, Bart wrote:
On 19/03/2026 19:40, Michael S wrote:
[...]
You said that plain quicksort (I guess that is what I used) is
likely 'sub-optimal' and that your complex approach will
'significantly outperform' it.
I don't recall a statement "[quicksort] is *likely* 'sub-optimal'"
(emphasis by me); but if that has been said - and if the algorithm
was meant - it's of course nonsense.
Your ignorance shows.
Quicksort is, for significantly large data sets, more likely faster
even in its basic form if compared other plain O(N log N) algorithms
and especially if compared to any O(N^2) algorithms.
Even if it
exhibits in rare corner cases O(N^2) it typically outperforms other
algorithms of O(N log N) class in practice on average random data.
Correct.
On 19/03/2026 19:40, Michael S wrote:
On Thu, 19 Mar 2026 18:33:13 +0000
Bart <bc@freeuk.com> wrote:
On 19/03/2026 15:29, Michael S wrote:
On Thu, 19 Mar 2026 14:49:16 +0000
Bart <bc@freeuk.com> wrote:
On 19/03/2026 09:19, Michael S wrote:
On Wed, 18 Mar 2026 11:20:03 +0100
David Brown <david.brown@hesbynett.no> wrote:
Normally however (and again in scripting code) I'd use my
built-in sort() based on quicksort, which is nearly 1000 times
faster than bubble-sort for my test (sort 20K random strings),
and some 300x faster than your routines. It's not O(n-squared)
either.
For lexicographic sort of 20K random strings, plain quicksort is
probably quite sub-optimal.
If performance is important, I'd consider combined method: first
pre-sort by 3-char or 4-char prefix with Radix sort ('by LS
character first' variation of algorithm), then use quicksprt to
sort sections with the same prefix. For string taken from the real
world it will not work as well as for artificial random strings,
but should still significantly outperform plain quicksort.
What do you think the slow-down would be? I set up another test,
sorting 1M random strings each exactly 16 characters long.
This is how long it took to sort via various means:
WSL shell 'sort': 2.3/3.5 seconds (real/user, from/to a file)
Windows 'sort': 4.2 seconds (from/to a file)
C's qsort: 0.5 seconds (gcc; initialised
char*[]/inplace) 0.6 seconds (tcc)
My script lang: 2.3/2.8 seconds (sort only/all, file to
in-memory)
(That last timing is somewhat remarkable given (1) that the sort
routine itself runs as interpreted, dynamic bytecode; (2) each
string compare involves calling C's 'strcmp' /after/ converting
args to ensure strings are zero-terminated.)
So how much faster ought it to be?
I don't understand the question. What answer could there possibly be
except for "There are no limits to perfection!" ?
You said that plain quicksort (I guess that is what I used) is likely 'sub-optimal' and that your complex approach will 'significantly
outperform' it.
So I'm just asking 'by how much'?
My figures suggested it was fast enough. However I don't know what
kind of sort routines are used in that table except for mine, which
is not native code.
So I ported my sort to C, but the figures are pretty much the same as
C's qsort(): 0.5 seconds for both tcc/gcc, even though it uses
dedicated compare code.
That routine is given below (not my algorithm; I adapted it long ago).
That it is 5-8 times as fast as shell methods of sorting (even
allowing for those to load and write files) seems good enough for me.
However, in my programs, I would look askance at anything that
required such a sorting step anyway. It's something I try and avoid.
-----------------------
void isort(char** data, int ll, int rr) {
char* temp;
int i = ll, j = rr;
char* pivot = data[(ll + rr) / 2];
do {
while (strcmp(pivot, data[i]) > 0 && i < rr) ++i;
while (strcmp(pivot, data[j]) < 0 && j > ll) --j;
if (i <= j) {
temp = data[i]; data[i] = data[j]; data[j] = temp;
++i;
--j;
}
} while (i <= j);
if (ll < j) isort(data, ll, j);
if (i < rr) isort(data, i, rr);
}
//For a char* array A of N elements, call as 'isort(A, 0, n-1)'.
But both are O(N log N), which is good to know. (And, frankly,
I wouldn't have expected any worse O(N^2) algorithm here.)
[*] C++/STL has at least guarantees for the complexities.
For me that would basically suffice. I don't necessarily
need to know whether it's the concrete algorithm A or B.
[...]
BTW, quicksort is O(N*logN) only as long as comparison is O(1). Which
does not hold in the general case of lexicographic sort.
Am 20.03.2026 um 02:33 schrieb Tristan Wibberley:
That seems pretty crazy itself, what causes it to be more than 1kiB?
template instantiations not in a .so even though they're really really
common?
How does one fix it without U/B due to not having the same definition of
an explicit instantiation throughout, i.e. via toolchain direction to
drop instantiations at link time because they're already available in
a .so?
Where's the problem ?
On 2026-03-19 10:19, Michael S wrote:
On Wed, 18 Mar 2026 11:20:03 +0100
David Brown <david.brown@hesbynett.no> wrote:
Bogosort is O(n * n!) on average - keep rearranging the elements at
random, then check if they are sorted. (The worst case is only
bounded complexity if your random generator is pseduo-random rather
than truly random.)
I didn't mean something intentionally crippled.
I remember seeing examples of worse than O(n**2) algorithms that at
first glance look reasonable. I don't remember details.
Then it's best to abandon imagined examples. (Or search for it
and then report about them so that we have something substantial
to talk about.)
[...]
The language is 'C'.
Here are implementations of Straight Insertion and Straight Select.
Show me implementation of Bubble sort that is not at least a little
more complicated.
I don't know about you, but Bubblesort is trivial, and the other
two O(N^2) methods you mention are close to trivial. Certainly
they play in the same "league". Compare these algorithms to the
other sophisticated algorithms whose principles usually cannot be
*obviously* understood (e.g. Quicksort, Shellsort, even Heapsort.
(There's a point (but negligible) if some other poster previously
said that it's easier for him to program Bubblesort than something
even slightly more sophisticated.)
Of course you can tweak any algorithm to make it better. But if
you're starting with a bad choice of an algorithm you won't fix
the inherent issues.
Remember that it has to be "real" bubble sort, not a simplified
bubble sort that does unnecessary work by starting each time from
the beginning. [...]
(There's Bubblesort. There's not "real" Bubblesort. Such phrases
neither explain anything nor are they helpful for discussions.)
Janis
[...]
On 2026-03-20 11:58, Michael S wrote:
[...]
BTW, quicksort is O(N*logN) only as long as comparison is O(1). Which
does not hold in the general case of lexicographic sort.
You have to differentiate the complexity of an _algorithm_ here;
we talk about how many comparisons and how many data swaps are
necessary.
If your _comparisons_ are costly, or your _data swaps_ are costly,
we don't blame the algorithm. If you happen to have a comparison
function of some O(X) the _sorting algorithm_ is still of its own
inherent algorithmic complexity. If you happen to swap BLOB data
by copying every byte (instead of sorting an index, for example)
you also cannot blame the sorting algorithm, its complexity still
holds.
If it were different any complexity measure for algorithms would
be meaningless.
Am 20.03.2026 um 12:38 schrieb Janis Papanagnou:
But both are O(N log N), which is good to know. (And, frankly,
I wouldn't have expected any worse O(N^2) algorithm here.)
Mergesort is always N(log N).
Database server's don't use
quicksort since it doesn't perform well it the items being
sorted have a variable size; so they use quicksort,
also
because of it has very linear accesses, which is good for
I/O.
[*] C++/STL has at least guarantees for the complexities.
For me that would basically suffice. I don't necessarily
need to know whether it's the concrete algorithm A or B.
You're too compulsive.
On 20/03/2026 04:01, Janis Papanagnou wrote:
Ah, I now see you know all that already, so I could have spared some writing. :-)
It's still nice to see we are on the same page. We all have daft
ideas or unexpected misunderstandings at times - things we've "always
known" that are actually completely wrong. So it's nice to get
conformation that others, thinking and writing independently, reach
the same conclusions.
(I'd expect that all the regulars here will know these things about
sorting algorithms, but there's always a chance that someone learns something from the posts.)
On 20/03/2026 13:13, Janis Papanagnou wrote:
On 2026-03-20 11:58, Michael S wrote:
[...]
BTW, quicksort is O(N*logN) only as long as comparison is O(1).
Which does not hold in the general case of lexicographic sort.
You have to differentiate the complexity of an _algorithm_ here;
we talk about how many comparisons and how many data swaps are
necessary.
If your _comparisons_ are costly, or your _data swaps_ are costly,
we don't blame the algorithm. If you happen to have a comparison
function of some O(X) the _sorting algorithm_ is still of its own
inherent algorithmic complexity. If you happen to swap BLOB data
by copying every byte (instead of sorting an index, for example)
you also cannot blame the sorting algorithm, its complexity still
holds.
If it were different any complexity measure for algorithms would
be meaningless.
That's all true.
If you have a situation where swaps are particularly costly, or
comparisons are particularly costly, or other factors (such as memory
usage) are particularly costly, then you might have to be more
nuanced in the complexity measurements you use when comparing
algorithms. You might find that an algorithm that is O(n) in
comparisons but O(n) in swaps is better for that purpose than one
that is O(n.log n) in both.
And don't forget that the constant factors in the O can be relevant.
There's a known algorithm for multiplication of big numbers that is
O(n.log n), which is (probably) the optimal order of complexity for
the task. But the constant factors mean it is only better than other algorithms once you are using truly absurdly big numbers (so that you
are saving a few minutes in your million-year calculations).
Sometimes a single O number is not nearly enough to tell you what you
need to know in comparisons between algorithms.
On Fri, 20 Mar 2026 13:26:34 +0100
David Brown <david.brown@hesbynett.no> wrote:
On 20/03/2026 13:13, Janis Papanagnou wrote:
On 2026-03-20 11:58, Michael S wrote:
[...]
BTW, quicksort is O(N*logN) only as long as comparison is O(1).
Which does not hold in the general case of lexicographic sort.
You have to differentiate the complexity of an _algorithm_ here;
we talk about how many comparisons and how many data swaps are
necessary.
If your _comparisons_ are costly, or your _data swaps_ are costly,
we don't blame the algorithm. If you happen to have a comparison
function of some O(X) the _sorting algorithm_ is still of its own
inherent algorithmic complexity. If you happen to swap BLOB data
by copying every byte (instead of sorting an index, for example)
you also cannot blame the sorting algorithm, its complexity still
holds.
If it were different any complexity measure for algorithms would
be meaningless.
That's all true.
No, it is not. That's all nonsense that demonstrates a shallow thinking. Comparison method one uses in lexicographic sort is very much
variable part of algorithm rather than something fixed.
Many good methods start sorting by using just few (sometimes one)
leading characters and only in later passes that typically operate on
much much shorter sections, they use full string comparison.
On Fri, 20 Mar 2026 13:26:34 +0100
David Brown <david.brown@hesbynett.no> wrote:
On 20/03/2026 13:13, Janis Papanagnou wrote:
On 2026-03-20 11:58, Michael S wrote:
[...]
BTW, quicksort is O(N*logN) only as long as comparison is O(1).
Which does not hold in the general case of lexicographic sort.
You have to differentiate the complexity of an _algorithm_ here;
we talk about how many comparisons and how many data swaps are
necessary.
If your _comparisons_ are costly, or your _data swaps_ are costly,
we don't blame the algorithm. If you happen to have a comparison
function of some O(X) the _sorting algorithm_ is still of its own
inherent algorithmic complexity. If you happen to swap BLOB data
by copying every byte (instead of sorting an index, for example)
you also cannot blame the sorting algorithm, its complexity still
holds.
If it were different any complexity measure for algorithms would
be meaningless.
That's all true.
No, it is not. That's all nonsense that demonstrates a shallow thinking. Comparison method one uses in lexicographic sort is very much
variable part of algorithm rather than something fixed.
Many good methods start sorting by using just few (sometimes one)
leading characters and only in later passes that typically operate on
much much shorter sections, they use full string comparison.
One example where doing it smart is particularly beneficial is a sort
at core of Burrows-Wheeler Transform.
If you have a situation where swaps are particularly costly, or
comparisons are particularly costly, or other factors (such as memory
usage) are particularly costly, then you might have to be more
nuanced in the complexity measurements you use when comparing
algorithms. You might find that an algorithm that is O(n²) in
comparisons but O(n) in swaps is better for that purpose than one
that is O(n.log n) in both.
And don't forget that the constant factors in the O can be relevant.
There's a known algorithm for multiplication of big numbers that is
O(n.log n), which is (probably) the optimal order of complexity for
the task. But the constant factors mean it is only better than other
algorithms once you are using truly absurdly big numbers (so that you
are saving a few minutes in your million-year calculations).
Sometimes a single O number is not nearly enough to tell you what you
need to know in comparisons between algorithms.
On 20/03/2026 13:08, Michael S wrote:
On Fri, 20 Mar 2026 13:26:34 +0100
David Brown <david.brown@hesbynett.no> wrote:
On 20/03/2026 13:13, Janis Papanagnou wrote:
On 2026-03-20 11:58, Michael S wrote:
[...]
BTW, quicksort is O(N*logN) only as long as comparison is O(1).
Which does not hold in the general case of lexicographic sort.
You have to differentiate the complexity of an _algorithm_ here;
we talk about how many comparisons and how many data swaps are
necessary.
If your _comparisons_ are costly, or your _data swaps_ are costly,
we don't blame the algorithm. If you happen to have a comparison
function of some O(X) the _sorting algorithm_ is still of its own
inherent algorithmic complexity. If you happen to swap BLOB data
by copying every byte (instead of sorting an index, for example)
you also cannot blame the sorting algorithm, its complexity still
holds.
If it were different any complexity measure for algorithms would
be meaningless.
That's all true.
No, it is not. That's all nonsense that demonstrates a shallow
thinking. Comparison method one uses in lexicographic sort is very
much variable part of algorithm rather than something fixed.
Many good methods start sorting by using just few (sometimes one)
leading characters and only in later passes that typically operate
on much much shorter sections, they use full string comparison.
I just tried something like that with bubble-sort: I split the data
into 26 arrays each only containing strings that start with 'a', 'b',
'c' and so on.
The arrays were sorted separately then concatenated (so no longer
in-place unless I take the extra step of overwriting the original).
Sorting 20K random strings reduced from 25 seconds to 0.8 seconds (interpreted code).
Of course, being random strings (and also all lower case), there was
a near-perfect distribution of strings starting with each letter!
It also requires extra storage and copying.
On 2026-03-20 10:14, Keith Thompson wrote:
Janis Papanagnou <janis_papanagnou+ng@hotmail.com> writes:
[...]
Quicksort (as opposed to qsort() - where the details are not[...]
obvious) is certainly a case for computer science lectures;
I'd only have hoped that these CS basics are broadly known,
also with non-CS educated or the many amateur programmers.
C doesn't specify which algorithm qsort() uses. The name is almost
certainly derived from the name of the Quicksort algorithm, but the
C standard only says that the contents of the array are sorted.
A conforming but perverse implemention could use Bubblesort or
Bogosort.
Yes, there's no [obvious] information; that's the dilemma![*]
Even the GNU libc documentation doesn't say what algorithm that
implementation uses. (In the source code, I see references to
mergesort and heapsort.)
Though the use of Mergesort is somewhat irritating (to me).
But both are O(N log N), which is good to know. (And, frankly,
I wouldn't have expected any worse O(N^2) algorithm here.)
Janis
[*] C++/STL has at least guarantees for the complexities.
For me that would basically suffice. I don't necessarily need
to know whether it's the concrete algorithm A or B.
(I'd expect that all the regulars here will know these things about
sorting algorithms, but there's always a chance that someone learns something from the posts.)
[...]
I don't see a dilemma. qsort() sorts. No serious implementation is
going to use an algorithm with worse than O(n log n) performance.
Even the GNU libc documentation doesn't say what algorithm that
implementation uses. (In the source code, I see references to
mergesort and heapsort.)
Though the use of Mergesort is somewhat irritating (to me).
But both are O(N log N), which is good to know. (And, frankly,
I wouldn't have expected any worse O(N^2) algorithm here.)
[...]
[*] C++/STL has at least guarantees for the complexities.
For me that would basically suffice. I don't necessarily need
to know whether it's the concrete algorithm A or B.
You effectively have the same guarantee for qsort(), even though it's
not spelled out in the standard.
Sometimes listening to you clc guys is like listening to a room full of
CS professors (which I suspect some of you are or were).
On 2026-03-20 22:10, DFS wrote:
Sometimes listening to you clc guys is like listening to a room full
of CS professors (which I suspect some of you are or were).
CS professors sitting in their ivory tower and without practical
experiences, and programmers without a substantial CS background;
both of these extreme characters can be problematic (if fanatic).
Many folks here seem to have a good mix of necessary practicalI've definitely seen Bart take some heat for continuing to post code in
IT, Project, and CS knowledge and proficiencies, as I'd value it.
And a clear mind to discuss topics. Sometimes it gets heated and
personal, though; certainly nothing to foster.
On 3/12/2026 2:24 AM, Bonita Montero wrote:
There was a programming-"contest" on comp.lang.c and I wanted to show
the simpler code in C, here it is:
[C++ code]
C++ really rocks since you've to deal with much less details than in C.
Is your strategy to just ignore reality, and keep making bogus claims
that - for this challenge at least - you can't support?
On 3/20/2026 3:35 AM, David Brown wrote:
(I'd expect that all the regulars here will know these things about
sorting algorithms, but there's always a chance that someone learns
something from the posts.)
This.
Sometimes listening to you clc guys is like listening to a room full of
CS professors (which I suspect some of you are or were).
On 3/20/2026 9:53 PM, Janis Papanagnou wrote:
On 2026-03-20 22:10, DFS wrote:
Sometimes listening to you clc guys is like listening to a room full
of CS professors (which I suspect some of you are or were).
CS professors sitting in their ivory tower and without practical
experiences, and programmers without a substantial CS background;
both of these extreme characters can be problematic (if fanatic).
It was meant as a compliment. Plenty of CS professors have good
practical and industry experience, too, which you'll see on their bios
and cv's.
It's got to be tough to find good CS teachers that stick around, given
they can probably make much more money in private industry.
Many folks here seem to have a good mix of necessary practicalI've definitely seen Bart take some heat for continuing to post code in
IT, Project, and CS knowledge and proficiencies, as I'd value it.
And a clear mind to discuss topics. Sometimes it gets heated and
personal, though; certainly nothing to foster.
his scripting language, rather than C
On 20/03/2026 14:08, Michael S wrote:
On Fri, 20 Mar 2026 13:26:34 +0100
David Brown <david.brown@hesbynett.no> wrote:
On 20/03/2026 13:13, Janis Papanagnou wrote:
On 2026-03-20 11:58, Michael S wrote:
[...]
BTW, quicksort is O(N*logN) only as long as comparison is O(1).
Which does not hold in the general case of lexicographic sort.
You have to differentiate the complexity of an _algorithm_ here;
we talk about how many comparisons and how many data swaps are
necessary.
If your _comparisons_ are costly, or your _data swaps_ are costly,
we don't blame the algorithm. If you happen to have a comparison
function of some O(X) the _sorting algorithm_ is still of its own
inherent algorithmic complexity. If you happen to swap BLOB data
by copying every byte (instead of sorting an index, for example)
you also cannot blame the sorting algorithm, its complexity still
holds.
If it were different any complexity measure for algorithms would
be meaningless.
That's all true.
No, it is not. That's all nonsense that demonstrates a shallow
thinking. Comparison method one uses in lexicographic sort is very
much variable part of algorithm rather than something fixed.
Many good methods start sorting by using just few (sometimes one)
leading characters and only in later passes that typically operate
on much much shorter sections, they use full string comparison.
One example where doing it smart is particularly beneficial is a
sort at core of Burrows-Wheeler Transform.
Yes, I realise that various kinds of radix or bucket sorts are often
good for large data sets, either in whole or as the first steps. But
I am not clear about how it contradicts what Janis has been saying -
I feel the two of you are talking somewhat past each other.
The cost
of doing a comparison does not affect the complexity of an algorithm
- but it can certainly affect the best choice of algorithm for the
task in hand.
On Fri, 20 Mar 2026 14:47:18 +0100
David Brown <david.brown@hesbynett.no> wrote:
On 20/03/2026 14:08, Michael S wrote:
On Fri, 20 Mar 2026 13:26:34 +0100
David Brown <david.brown@hesbynett.no> wrote:
On 20/03/2026 13:13, Janis Papanagnou wrote:
On 2026-03-20 11:58, Michael S wrote:
[...]
BTW, quicksort is O(N*logN) only as long as comparison is O(1).
Which does not hold in the general case of lexicographic sort.
You have to differentiate the complexity of an _algorithm_ here;
we talk about how many comparisons and how many data swaps are
necessary.
If your _comparisons_ are costly, or your _data swaps_ are costly,
we don't blame the algorithm. If you happen to have a comparison
function of some O(X) the _sorting algorithm_ is still of its own
inherent algorithmic complexity. If you happen to swap BLOB data
by copying every byte (instead of sorting an index, for example)
you also cannot blame the sorting algorithm, its complexity still
holds.
If it were different any complexity measure for algorithms would
be meaningless.
That's all true.
No, it is not. That's all nonsense that demonstrates a shallow
thinking. Comparison method one uses in lexicographic sort is very
much variable part of algorithm rather than something fixed.
Many good methods start sorting by using just few (sometimes one)
leading characters and only in later passes that typically operate
on much much shorter sections, they use full string comparison.
One example where doing it smart is particularly beneficial is a
sort at core of Burrows-Wheeler Transform.
Yes, I realise that various kinds of radix or bucket sorts are often
good for large data sets, either in whole or as the first steps. But
I am not clear about how it contradicts what Janis has been saying -
I feel the two of you are talking somewhat past each other.
Read the thread. We were talking with Bart and understanding each other
well enough. Then came Janis with ignorant statement that in effect was saying that plain quicksort algorithm with strcmp() as comparison
routine is optimal or close to optimal for lexicographic sorting
similar to one done by Linux or Windows sort commands applied to text
files with 20K to 1M lines of average length of few dozens characters.
The cost
of doing a comparison does not affect the complexity of an algorithm
- but it can certainly affect the best choice of algorithm for the
task in hand.
[...]
On Fri, 20 Mar 2026 08:35:05 +0100
David Brown <david.brown@hesbynett.no> wrote:
On 20/03/2026 04:01, Janis Papanagnou wrote:
Ah, I now see you know all that already, so I could have spared some
writing. :-)
It's still nice to see we are on the same page. We all have daft
ideas or unexpected misunderstandings at times - things we've "always
known" that are actually completely wrong. So it's nice to get
conformation that others, thinking and writing independently, reach
the same conclusions.
(I'd expect that all the regulars here will know these things about
sorting algorithms, but there's always a chance that someone learns
something from the posts.)
Pay attention that while it is not codified in C++ Standard,
implementations of std::sort are expected to be "in-place", which
practically means that extra storage should be O(logN) or at worst O(sqrt(N)). It means that merge sort is out of question. Radix/Count
sort is out of question both for this reason and because std::sort API
does not provide sufficient guarantees about the structure of key.
Heapsort is o.k in that regard.
I think that most real-world STL implementations have heapsort as a
back up for extremely rare case of primary algorithm (quicksort with median-of-3 pivot) misbehaving.
On 3/20/2026 9:53 PM, Janis Papanagnou wrote:
On 2026-03-20 22:10, DFS wrote:
Sometimes listening to you clc guys is like listening to a room full
of CS professors (which I suspect some of you are or were).
CS professors sitting in their ivory tower and without practical
experiences, and programmers without a substantial CS background;
both of these extreme characters can be problematic (if fanatic).
It was meant as a compliment. Plenty of CS professors have good
practical and industry experience, too, which you'll see on their bios
and cv's.
It's got to be tough to find good CS teachers that stick around, given
they can probably make much more money in private industry.
[...]
Actually, to be strictly topical, nobody should even be talking about
any C extensions, or C compilers, or build systems, or programs that
happen to be written in C - only Standard C, The Language. Yet there
have been plenty of discussions about all those and a lot more.
I once made a post about my own C-subset compiler:
https://groups.google.com/g/comp.lang.c/c/0lLNz9lathE/m/Lt4Jh0qqAwAJ
and it was deemed off-topic by Tim Rentsch:
"Please confine your postings in comp.lang.c to topics and subjects
relevant to the C language. None of what you say in your posting
is topical in comp.lang.c. An obvious suggestion is the newsgroup comp.compilers instead."
The challenge was issued for David Brown and for Bart.
I never expected that you will give constructive reply.
Remember that it has to be "real" bubble sort, not a simplified bubble
sort that does unnecessary work by starting each time from the
beginning. [...]
(There's Bubblesort. There's not "real" Bubblesort. Such phrases
neither explain anything nor are they helpful for discussions.)
Thank you for confirming my expectations.
On 2026-03-20 13:42, Michael S wrote:
On Fri, 20 Mar 2026 08:35:05 +0100
David Brown <david.brown@hesbynett.no> wrote:
On 20/03/2026 04:01, Janis Papanagnou wrote:
Ah, I now see you know all that already, so I could have spared
some writing. :-)
It's still nice to see we are on the same page. We all have daft
ideas or unexpected misunderstandings at times - things we've
"always known" that are actually completely wrong. So it's nice
to get conformation that others, thinking and writing
independently, reach the same conclusions.
(I'd expect that all the regulars here will know these things about
sorting algorithms, but there's always a chance that someone learns
something from the posts.)
Pay attention that while it is not codified in C++ Standard, implementations of std::sort are expected to be "in-place", which practically means that extra storage should be O(logN) or at worst O(sqrt(N)). It means that merge sort is out of question. Radix/Count
sort is out of question both for this reason and because std::sort
API does not provide sufficient guarantees about the structure of
key. Heapsort is o.k in that regard.
I think that most real-world STL implementations have heapsort as a
back up for extremely rare case of primary algorithm (quicksort with median-of-3 pivot) misbehaving.
I had found these statements:
* By default, std::sort() uses Introsort, a hybrid algorithm
combining Quick Sort, Heap Sort, and Insertion Sort.
* Its time complexity is O(Nlog(N)) in the average and worst
cases.
The Insertion Sort function had ever been inherent part of Quicksort implementations for data sub-ranges once they become of small sizes.
I'm positive, though, that your statement of "misbehaving" Quicksort
makes absolutely no sense (at least as you formulated it). How could
a call to sort() decide to use a "backup" algorithm; the very "rare"
O(N^2) corner case that you spoke about is depending on the _actual_
_data_ and cannot be characterized a priori!
What the Introsort algorithm actually does is dynamically depending
on the _recursion depth_, and to control that. To quote:
"Introsort begins with quicksort and if the recursion depth
goes more than a particular limit it switches to Heapsort".
But then you should also know that you can also natively in Quicksort
control the recursive calls to not exceed recursive calls or the stack
size by log(N).
Janis
On 2026-03-20 13:24, Michael S wrote:
The challenge was issued for David Brown and for Bart.
If you think that Usenet is for private communication you've a
fundamental misconception about that.
I never expected that you will give constructive reply.
Your perception may be severely impaired, but I don't care much.
I don't know about you, but I find requests for clarification a
sensible demand. You didn't answer to that but preferred keeping
the discussion muddy with your phrases; the context was you saying:
Remember that it has to be "real" bubble sort, not a simplified
bubble sort that does unnecessary work by starting each time from
the beginning. [...]
And I noted:
(There's Bubblesort. There's not "real" Bubblesort. Such phrases
neither explain anything nor are they helpful for discussions.)
You could have clarified that fuzzy statement instead of rambling.
Thank you for confirming my expectations.
I hope you feel good in your mental bubble.[*]
Janis
[*] No pun with Bubblesort intended.
| Sysop: | DaiTengu |
|---|---|
| Location: | Appleton, WI |
| Users: | 1,104 |
| Nodes: | 10 (1 / 9) |
| Uptime: | 492386:38:07 |
| Calls: | 14,150 |
| Calls today: | 1 |
| Files: | 186,281 |
| D/L today: |
2,214 files (841M bytes) |
| Messages: | 2,501,137 |