aio_misc.c source code [glibc/sysdeps/pthread/aio_misc.c]

1	/ Handle general operations.*
2	Copyright (C) 1997-2021 Free Software Foundation, Inc.
3	This file is part of the GNU C Library.
4	Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
5
6	The GNU C Library is free software; you can redistribute it and/or
7	modify it under the terms of the GNU Lesser General Public
8	License as published by the Free Software Foundation; either
9	version 2.1 of the License, or (at your option) any later version.
10
11	The GNU C Library is distributed in the hope that it will be useful,
12	but WITHOUT ANY WARRANTY; without even the implied warranty of
13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14	Lesser General Public License for more details.
15
16	You should have received a copy of the GNU Lesser General Public
17	License along with the GNU C Library; if not, see
18	<https://www.gnu.org/licenses/>. /*
19
20	#include <aio.h>
21	#include <assert.h>
22	#include <errno.h>
23	#include <limits.h>
24	#include <pthread.h>
25	#include <stdlib.h>
26	#include <unistd.h>
27	#include <sys/param.h>
28	#include <sys/stat.h>
29	#include <sys/time.h>
30	#include <aio_misc.h>
31
32	#ifndef aio_create_helper_thread
33	# define aio_create_helper_thread __aio_create_helper_thread
34
35	extern inline int
36	__aio_create_helper_thread (pthread_t threadp, void* (tf) (void ), void* *arg)
37	{
38	pthread_attr_t attr;
39
40	/ Make sure the thread is created detached. /
41	pthread_attr_init (&attr);
42	pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
43
44	int ret = pthread_create (threadp, &attr, tf, arg);
45
46	(void) pthread_attr_destroy (&attr);
47	return ret;
48	}
49	#endif
50
51	static void add_request_to_runlist (struct requestlist *newrequest);
52
53	/ Pool of request list entries. /
54	static struct requestlist **pool;
55
56	/ Number of total and allocated pool entries. /
57	static size_t pool_max_size;
58	static size_t pool_size;
59
60	/ We implement a two dimensional array but allocate each row separately.*
61	The macro below determines how many entries should be used per row.
62	It should better be a power of two. /*
63	#define ENTRIES_PER_ROW 32
64
65	/ How many rows we allocate at once. /
66	#define ROWS_STEP 8
67
68	/ List of available entries. /
69	static struct requestlist *freelist;
70
71	/ List of request waiting to be processed. /
72	static struct requestlist *runlist;
73
74	/ Structure list of all currently processed requests. /
75	static struct requestlist *requests;
76
77	/ Number of threads currently running. /
78	static int nthreads;
79
80	/ Number of threads waiting for work to arrive. /
81	static int idle_thread_count;
82
83
84	/ These are the values used to optimize the use of AIO. The user can*
85	overwrite them by using the `aio_init' function. /*
86	static struct aioinit optim =
87	{
88	`20`, / int aio_threads; Maximal number of threads. /
89	`64`, / int aio_num; Number of expected simultaneous requests. /
90	`0`,
91	`0`,
92	`0`,
93	`0`,
94	`1`,
95	`0`
96	};
97
98
99	/ Since the list is global we need a mutex protecting it. /
100	pthread_mutex_t __aio_requests_mutex = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
101
102	/ When you add a request to the list and there are idle threads present,*
103	you signal this condition variable. When a thread finishes work, it waits
104	on this condition variable for a time before it actually exits. /*
105	pthread_cond_t __aio_new_request_notification = PTHREAD_COND_INITIALIZER;
106
107
108	/ Functions to handle request list pool. /
109	static struct requestlist *
110	get_elem (void)
111	{
112	struct requestlist *result;
113
114	if (freelist == NULL)
115	{
116	struct requestlist *new_row;
117	int cnt;
118
119	assert (sizeof (struct aiocb) == sizeof (struct aiocb64));
120
121	if (pool_size + `1` >= pool_max_size)
122	{
123	size_t new_max_size = pool_max_size + ROWS_STEP;
124	struct requestlist **new_tab;
125
126	new_tab = (struct requestlist **)
127	realloc (pool, new_max_size * sizeof (struct requestlist *));
128
129	if (new_tab == NULL)
130	return NULL;
131
132	pool_max_size = new_max_size;
133	pool = new_tab;
134	}
135
136	/ Allocate the new row. /
137	cnt = pool_size == `0` ? optim.aio_num : ENTRIES_PER_ROW;
138	new_row = (struct requestlist *) calloc (cnt,
139	sizeof (struct requestlist));
140	if (new_row == NULL)
141	return NULL;
142
143	pool[pool_size++] = new_row;
144
145	/ Put all the new entries in the freelist. /
146	do
147	{
148	new_row->next_prio = freelist;
149	freelist = new_row++;
150	}
151	while (--cnt > `0`);
152	}
153
154	result = freelist;
155	freelist = freelist->next_prio;
156
157	return result;
158	}
159
160
161	void
162	__aio_free_request (struct requestlist *elem)
163	{
164	elem->running = no;
165	elem->next_prio = freelist;
166	freelist = elem;
167	}
168
169
170	struct requestlist *
171	__aio_find_req (aiocb_union *elem)
172	{
173	struct requestlist *runp = requests;
174	int fildes = elem->aiocb.aio_fildes;
175
176	while (runp != NULL && runp->aiocbp->aiocb.aio_fildes < fildes)
177	runp = runp->next_fd;
178
179	if (runp != NULL)
180	{
181	if (runp->aiocbp->aiocb.aio_fildes != fildes)
182	runp = NULL;
183	else
184	while (runp != NULL && runp->aiocbp != elem)
185	runp = runp->next_prio;
186	}
187
188	return runp;
189	}
190
191
192	struct requestlist *
193	__aio_find_req_fd (int fildes)
194	{
195	struct requestlist *runp = requests;
196
197	while (runp != NULL && runp->aiocbp->aiocb.aio_fildes < fildes)
198	runp = runp->next_fd;
199
200	return (runp != NULL && runp->aiocbp->aiocb.aio_fildes == fildes
201	? runp : NULL);
202	}
203
204
205	void
206	__aio_remove_request (struct requestlist last, struct* requestlist *req,
207	int all)
208	{
209	assert (req->running == yes \|\| req->running == queued
210	\|\| req->running == done);
211
212	if (last != NULL)
213	last->next_prio = all ? NULL : req->next_prio;
214	else
215	{
216	if (all \|\| req->next_prio == NULL)
217	{
218	if (req->last_fd != NULL)
219	req->last_fd->next_fd = req->next_fd;
220	else
221	requests = req->next_fd;
222	if (req->next_fd != NULL)
223	req->next_fd->last_fd = req->last_fd;
224	}
225	else
226	{
227	if (req->last_fd != NULL)
228	req->last_fd->next_fd = req->next_prio;
229	else
230	requests = req->next_prio;
231
232	if (req->next_fd != NULL)
233	req->next_fd->last_fd = req->next_prio;
234
235	req->next_prio->last_fd = req->last_fd;
236	req->next_prio->next_fd = req->next_fd;
237
238	/ Mark this entry as runnable. /
239	req->next_prio->running = yes;
240	}
241
242	if (req->running == yes)
243	{
244	struct requestlist *runp = runlist;
245
246	last = NULL;
247	while (runp != NULL)
248	{
249	if (runp == req)
250	{
251	if (last == NULL)
252	runlist = runp->next_run;
253	else
254	last->next_run = runp->next_run;
255	break;
256	}
257	last = runp;
258	runp = runp->next_run;
259	}
260	}
261	}
262	}
263
264
265	/ The thread handler. /
266	static void handle_fildes_io (void* *arg);
267
268
269	/ User optimization. /
270	void
271	__aio_init (const struct aioinit *init)
272	{
273	/ Get the mutex. /
274	pthread_mutex_lock (&__aio_requests_mutex);
275
276	/ Only allow writing new values if the table is not yet allocated. /
277	if (pool == NULL)
278	{
279	optim.aio_threads = init->aio_threads < `1` ? `1` : init->aio_threads;
280	assert (powerof2 (ENTRIES_PER_ROW));
281	optim.aio_num = (init->aio_num < ENTRIES_PER_ROW
282	? ENTRIES_PER_ROW
283	: init->aio_num & ~(ENTRIES_PER_ROW - `1`));
284	}
285
286	if (init->aio_idle_time != `0`)
287	optim.aio_idle_time = init->aio_idle_time;
288
289	/ Release the mutex. /
290	pthread_mutex_unlock (&__aio_requests_mutex);
291	}
292	weak_alias (__aio_init, aio_init)
293
294
295	/ The main function of the async I/O handling. It enqueues requests*
296	and if necessary starts and handles threads. /*
297	struct requestlist *
298	__aio_enqueue_request (aiocb_union aiocbp, int* operation)
299	{
300	int result = `0`;
301	int policy, prio;
302	struct sched_param param;
303	struct requestlist last, runp, *newp;
304	int running = no;
305
306	if (operation == LIO_SYNC \|\| operation == LIO_DSYNC)
307	aiocbp->aiocb.aio_reqprio = `0`;
308	else if (aiocbp->aiocb.aio_reqprio < `0`
309	#ifdef AIO_PRIO_DELTA_MAX
310	\|\| aiocbp->aiocb.aio_reqprio > AIO_PRIO_DELTA_MAX
311	#endif
312	)
313	{
314	/ Invalid priority value. /
315	__set_errno (EINVAL);
316	aiocbp->aiocb.__error_code = EINVAL;
317	aiocbp->aiocb.__return_value = -`1`;
318	return NULL;
319	}
320
321	/ Compute priority for this request. /
322	pthread_getschedparam (pthread_self (), &policy, &param);
323	prio = param.sched_priority - aiocbp->aiocb.aio_reqprio;
324
325	/ Get the mutex. /
326	pthread_mutex_lock (&__aio_requests_mutex);
327
328	last = NULL;
329	runp = requests;
330	/ First look whether the current file descriptor is currently*
331	worked with. /*
332	while (runp != NULL
333	&& runp->aiocbp->aiocb.aio_fildes < aiocbp->aiocb.aio_fildes)
334	{
335	last = runp;
336	runp = runp->next_fd;
337	}
338
339	/ Get a new element for the waiting list. /
340	newp = get_elem ();
341	if (newp == NULL)
342	{
343	pthread_mutex_unlock (&__aio_requests_mutex);
344	__set_errno (EAGAIN);
345	return NULL;
346	}
347	newp->aiocbp = aiocbp;
348	newp->waiting = NULL;
349
350	aiocbp->aiocb.__abs_prio = prio;
351	aiocbp->aiocb.__policy = policy;
352	aiocbp->aiocb.aio_lio_opcode = operation;
353	aiocbp->aiocb.__error_code = EINPROGRESS;
354	aiocbp->aiocb.__return_value = `0`;
355
356	if (runp != NULL
357	&& runp->aiocbp->aiocb.aio_fildes == aiocbp->aiocb.aio_fildes)
358	{
359	/ The current file descriptor is worked on. It makes no sense*
360	to start another thread since this new thread would fight
361	with the running thread for the resources. But we also cannot
362	say that the thread processing this desriptor shall immediately
363	after finishing the current job process this request if there
364	are other threads in the running queue which have a higher
365	priority. /*
366
367	/ Simply enqueue it after the running one according to the*
368	priority. /*
369	last = NULL;
370	while (runp->next_prio != NULL
371	&& runp->next_prio->aiocbp->aiocb.__abs_prio >= prio)
372	{
373	last = runp;
374	runp = runp->next_prio;
375	}
376
377	newp->next_prio = runp->next_prio;
378	runp->next_prio = newp;
379
380	running = queued;
381	}
382	else
383	{
384	running = yes;
385	/ Enqueue this request for a new descriptor. /
386	if (last == NULL)
387	{
388	newp->last_fd = NULL;
389	newp->next_fd = requests;
390	if (requests != NULL)
391	requests->last_fd = newp;
392	requests = newp;
393	}
394	else
395	{
396	newp->next_fd = last->next_fd;
397	newp->last_fd = last;
398	last->next_fd = newp;
399	if (newp->next_fd != NULL)
400	newp->next_fd->last_fd = newp;
401	}
402
403	newp->next_prio = NULL;
404	last = NULL;
405	}
406
407	if (running == yes)
408	{
409	/ We try to create a new thread for this file descriptor. The*
410	function which gets called will handle all available requests
411	for this descriptor and when all are processed it will
412	terminate.
413
414	If no new thread can be created or if the specified limit of
415	threads for AIO is reached we queue the request. /*
416
417	/ See if we need to and are able to create a thread. /
418	if (nthreads < optim.aio_threads && idle_thread_count == `0`)
419	{
420	pthread_t thid;
421
422	running = newp->running = allocated;
423
424	/ Now try to start a thread. /
425	result = aio_create_helper_thread (&thid, handle_fildes_io, newp);
426	if (result == `0`)
427	/ We managed to enqueue the request. All errors which can*
428	happen now can be recognized by calls to `aio_return' and
429	`aio_error'. /*
430	++nthreads;
431	else
432	{
433	/ Reset the running flag. The new request is not running. /
434	running = newp->running = yes;
435
436	if (nthreads == `0`)
437	{
438	/ We cannot create a thread in the moment and there is*
439	also no thread running. This is a problem. `errno' is
440	set to EAGAIN if this is only a temporary problem. /*
441	__aio_remove_request (last, newp, `0`);
442	}
443	else
444	result = `0`;
445	}
446	}
447	}
448
449	/ Enqueue the request in the run queue if it is not yet running. /
450	if (running == yes && result == `0`)
451	{
452	add_request_to_runlist (newp);
453
454	/ If there is a thread waiting for work, then let it know that we*
455	have just given it something to do. /*
456	if (idle_thread_count > `0`)
457	pthread_cond_signal (&__aio_new_request_notification);
458	}
459
460	if (result == `0`)
461	newp->running = running;
462	else
463	{
464	/ Something went wrong. /
465	__aio_free_request (newp);
466	aiocbp->aiocb.__error_code = result;
467	__set_errno (result);
468	newp = NULL;
469	}
470
471	/ Release the mutex. /
472	pthread_mutex_unlock (&__aio_requests_mutex);
473
474	return newp;
475	}
476
477
478	static void *
479	handle_fildes_io (void *arg)
480	{
481	pthread_t self = pthread_self ();
482	struct sched_param param;
483	struct requestlist runp = (struct* requestlist *) arg;
484	aiocb_union *aiocbp;
485	int policy;
486	int fildes;
487
488	pthread_getschedparam (self, &policy, &param);
489
490	do
491	{
492	/ If runp is NULL, then we were created to service the work queue*
493	in general, not to handle any particular request. In that case we
494	skip the "do work" stuff on the first pass, and go directly to the
495	"get work off the work queue" part of this loop, which is near the
496	end. /*
497	if (runp == NULL)
498	pthread_mutex_lock (&__aio_requests_mutex);
499	else
500	{
501	/ Hopefully this request is marked as running. /
502	assert (runp->running == allocated);
503
504	/ Update our variables. /
505	aiocbp = runp->aiocbp;
506	fildes = aiocbp->aiocb.aio_fildes;
507
508	/ Change the priority to the requested value (if necessary). /
509	if (aiocbp->aiocb.__abs_prio != param.sched_priority
510	\|\| aiocbp->aiocb.__policy != policy)
511	{
512	param.sched_priority = aiocbp->aiocb.__abs_prio;
513	policy = aiocbp->aiocb.__policy;
514	pthread_setschedparam (self, policy, &param);
515	}
516
517	/ Process request pointed to by RUNP. We must not be disturbed*
518	by signals. /*
519	if ((aiocbp->aiocb.aio_lio_opcode & `127`) == LIO_READ)
520	{
521	if (sizeof (off_t) != sizeof (off64_t)
522	&& aiocbp->aiocb.aio_lio_opcode & `128`)
523	aiocbp->aiocb.__return_value =
524	TEMP_FAILURE_RETRY (__pread64 (fildes, (void *)
525	aiocbp->aiocb64.aio_buf,
526	aiocbp->aiocb64.aio_nbytes,
527	aiocbp->aiocb64.aio_offset));
528	else
529	aiocbp->aiocb.__return_value =
530	TEMP_FAILURE_RETRY (__libc_pread (fildes,
531	(void *)
532	aiocbp->aiocb.aio_buf,
533	aiocbp->aiocb.aio_nbytes,
534	aiocbp->aiocb.aio_offset));
535
536	if (aiocbp->aiocb.__return_value == -`1` && errno == ESPIPE)
537	/ The Linux kernel is different from others. It returns*
538	ESPIPE if using pread on a socket. Other platforms
539	simply ignore the offset parameter and behave like
540	read. /*
541	aiocbp->aiocb.__return_value =
542	TEMP_FAILURE_RETRY (read (fildes,
543	(void *) aiocbp->aiocb64.aio_buf,
544	aiocbp->aiocb64.aio_nbytes));
545	}
546	else if ((aiocbp->aiocb.aio_lio_opcode & `127`) == LIO_WRITE)
547	{
548	if (sizeof (off_t) != sizeof (off64_t)
549	&& aiocbp->aiocb.aio_lio_opcode & `128`)
550	aiocbp->aiocb.__return_value =
551	TEMP_FAILURE_RETRY (__pwrite64 (fildes, (const void *)
552	aiocbp->aiocb64.aio_buf,
553	aiocbp->aiocb64.aio_nbytes,
554	aiocbp->aiocb64.aio_offset));
555	else
556	aiocbp->aiocb.__return_value =
557	TEMP_FAILURE_RETRY (__libc_pwrite (fildes, (const void *)
558	aiocbp->aiocb.aio_buf,
559	aiocbp->aiocb.aio_nbytes,
560	aiocbp->aiocb.aio_offset));
561
562	if (aiocbp->aiocb.__return_value == -`1` && errno == ESPIPE)
563	/ The Linux kernel is different from others. It returns*
564	ESPIPE if using pwrite on a socket. Other platforms
565	simply ignore the offset parameter and behave like
566	write. /*
567	aiocbp->aiocb.__return_value =
568	TEMP_FAILURE_RETRY (write (fildes,
569	(void *) aiocbp->aiocb64.aio_buf,
570	aiocbp->aiocb64.aio_nbytes));
571	}
572	else if (aiocbp->aiocb.aio_lio_opcode == LIO_DSYNC)
573	aiocbp->aiocb.__return_value =
574	TEMP_FAILURE_RETRY (fdatasync (fildes));
575	else if (aiocbp->aiocb.aio_lio_opcode == LIO_SYNC)
576	aiocbp->aiocb.__return_value =
577	TEMP_FAILURE_RETRY (fsync (fildes));
578	else
579	{
580	/ This is an invalid opcode. /
581	aiocbp->aiocb.__return_value = -`1`;
582	__set_errno (EINVAL);
583	}
584
585	/ Get the mutex. /
586	pthread_mutex_lock (&__aio_requests_mutex);
587
588	if (aiocbp->aiocb.__return_value == -`1`)
589	aiocbp->aiocb.__error_code = errno;
590	else
591	aiocbp->aiocb.__error_code = `0`;
592
593	/ Send the signal to notify about finished processing of the*
594	request. /*
595	__aio_notify (runp);
596
597	/ For debugging purposes we reset the running flag of the*
598	finished request. /*
599	assert (runp->running == allocated);
600	runp->running = done;
601
602	/ Now dequeue the current request. /
603	__aio_remove_request (NULL, runp, `0`);
604	if (runp->next_prio != NULL)
605	add_request_to_runlist (runp->next_prio);
606
607	/ Free the old element. /
608	__aio_free_request (runp);
609	}
610
611	runp = runlist;
612
613	/ If the runlist is empty, then we sleep for a while, waiting for*
614	something to arrive in it. /*
615	if (runp == NULL && optim.aio_idle_time >= `0`)
616	{
617	struct timespec now;
618	struct timespec wakeup_time;
619
620	++idle_thread_count;
621	__clock_gettime (CLOCK_REALTIME, &now);
622	wakeup_time.tv_sec = now.tv_sec + optim.aio_idle_time;
623	wakeup_time.tv_nsec = now.tv_nsec;
624	if (wakeup_time.tv_nsec >= `1000000000`)
625	{
626	wakeup_time.tv_nsec -= `1000000000`;
627	++wakeup_time.tv_sec;
628	}
629	pthread_cond_timedwait (&__aio_new_request_notification,
630	&__aio_requests_mutex,
631	&wakeup_time);
632	--idle_thread_count;
633	runp = runlist;
634	}
635
636	if (runp == NULL)
637	--nthreads;
638	else
639	{
640	assert (runp->running == yes);
641	runp->running = allocated;
642	runlist = runp->next_run;
643
644	/ If we have a request to process, and there's still another in*
645	the run list, then we need to either wake up or create a new
646	thread to service the request that is still in the run list. /*
647	if (runlist != NULL)
648	{
649	/ There are at least two items in the work queue to work on.*
650	If there are other idle threads, then we should wake them
651	up for these other work elements; otherwise, we should try
652	to create a new thread. /*
653	if (idle_thread_count > `0`)
654	pthread_cond_signal (&__aio_new_request_notification);
655	else if (nthreads < optim.aio_threads)
656	{
657	pthread_t thid;
658	pthread_attr_t attr;
659
660	/ Make sure the thread is created detached. /
661	pthread_attr_init (&attr);
662	pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
663
664	/ Now try to start a thread. If we fail, no big deal,*
665	because we know that there is at least one thread (us)
666	that is working on AIO operations. /*
667	if (pthread_create (&thid, &attr, handle_fildes_io, NULL)
668	== `0`)
669	++nthreads;
670	}
671	}
672	}
673
674	/ Release the mutex. /
675	pthread_mutex_unlock (&__aio_requests_mutex);
676	}
677	while (runp != NULL);
678
679	return NULL;
680	}
681
682
683	/ Free allocated resources. /
684	libc_freeres_fn (free_res)
685	{
686	size_t row;
687
688	for (row = `0`; row < pool_max_size; ++row)
689	free (pool[row]);
690
691	free (pool);
692	}
693
694
695	/ Add newrequest to the runlist. The __abs_prio flag of newrequest must*
696	be correctly set to do this. Also, you had better set newrequest's
697	"running" flag to "yes" before you release your lock or you'll throw an
698	assertion. /*
699	static void
700	add_request_to_runlist (struct requestlist *newrequest)
701	{
702	int prio = newrequest->aiocbp->aiocb.__abs_prio;
703	struct requestlist *runp;
704
705	if (runlist == NULL \|\| runlist->aiocbp->aiocb.__abs_prio < prio)
706	{
707	newrequest->next_run = runlist;
708	runlist = newrequest;
709	}
710	else
711	{
712	runp = runlist;
713
714	while (runp->next_run != NULL
715	&& runp->next_run->aiocbp->aiocb.__abs_prio >= prio)
716	runp = runp->next_run;
717
718	newrequest->next_run = runp->next_run;
719	runp->next_run = newrequest;
720	}
721	}
722

Browse the source code of glibc/sysdeps/pthread/aio_misc.c