You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

uavc.c 14KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529
  1. /* -------------------------------------------------------------------------
  2. *
  3. * contrib/sepgsql/uavc.c
  4. *
  5. * Implementation of userspace access vector cache; that enables to cache
  6. * access control decisions recently used, and reduce number of kernel
  7. * invocations to avoid unnecessary performance hit.
  8. *
  9. * Copyright (c) 2011-2019, PostgreSQL Global Development Group
  10. *
  11. * -------------------------------------------------------------------------
  12. */
  13. #include "postgres.h"
  14. #include "access/hash.h"
  15. #include "catalog/pg_proc.h"
  16. #include "commands/seclabel.h"
  17. #include "storage/ipc.h"
  18. #include "utils/guc.h"
  19. #include "utils/memutils.h"
  20. #include "sepgsql.h"
  21. /*
  22. * avc_cache
  23. *
  24. * It enables to cache access control decision (and behavior on execution of
  25. * trusted procedure, db_procedure class only) for a particular pair of
  26. * security labels and object class in userspace.
  27. */
  28. typedef struct
  29. {
  30. uint32 hash; /* hash value of this cache entry */
  31. char *scontext; /* security context of the subject */
  32. char *tcontext; /* security context of the target */
  33. uint16 tclass; /* object class of the target */
  34. uint32 allowed; /* permissions to be allowed */
  35. uint32 auditallow; /* permissions to be audited on allowed */
  36. uint32 auditdeny; /* permissions to be audited on denied */
  37. bool permissive; /* true, if permissive rule */
  38. bool hot_cache; /* true, if recently referenced */
  39. bool tcontext_is_valid;
  40. /* true, if tcontext is valid */
  41. char *ncontext; /* temporary scontext on execution of trusted
  42. * procedure, or NULL elsewhere */
  43. } avc_cache;
  44. /*
  45. * Declaration of static variables
  46. */
  47. #define AVC_NUM_SLOTS 512
  48. #define AVC_NUM_RECLAIM 16
  49. #define AVC_DEF_THRESHOLD 384
  50. static MemoryContext avc_mem_cxt;
  51. static List *avc_slots[AVC_NUM_SLOTS]; /* avc's hash buckets */
  52. static int avc_num_caches; /* number of caches currently used */
  53. static int avc_lru_hint; /* index of the buckets to be reclaimed next */
  54. static int avc_threshold; /* threshold to launch cache-reclaiming */
  55. static char *avc_unlabeled; /* system 'unlabeled' label */
  56. /*
  57. * Hash function
  58. */
  59. static uint32
  60. sepgsql_avc_hash(const char *scontext, const char *tcontext, uint16 tclass)
  61. {
  62. return hash_any((const unsigned char *) scontext, strlen(scontext))
  63. ^ hash_any((const unsigned char *) tcontext, strlen(tcontext))
  64. ^ tclass;
  65. }
  66. /*
  67. * Reset all the avc caches
  68. */
  69. static void
  70. sepgsql_avc_reset(void)
  71. {
  72. MemoryContextReset(avc_mem_cxt);
  73. memset(avc_slots, 0, sizeof(List *) * AVC_NUM_SLOTS);
  74. avc_num_caches = 0;
  75. avc_lru_hint = 0;
  76. avc_unlabeled = NULL;
  77. }
  78. /*
  79. * Reclaim caches recently unreferenced
  80. */
  81. static void
  82. sepgsql_avc_reclaim(void)
  83. {
  84. ListCell *cell;
  85. ListCell *next;
  86. ListCell *prev;
  87. int index;
  88. while (avc_num_caches >= avc_threshold - AVC_NUM_RECLAIM)
  89. {
  90. index = avc_lru_hint;
  91. prev = NULL;
  92. for (cell = list_head(avc_slots[index]); cell; cell = next)
  93. {
  94. avc_cache *cache = lfirst(cell);
  95. next = lnext(cell);
  96. if (!cache->hot_cache)
  97. {
  98. avc_slots[index]
  99. = list_delete_cell(avc_slots[index], cell, prev);
  100. pfree(cache->scontext);
  101. pfree(cache->tcontext);
  102. if (cache->ncontext)
  103. pfree(cache->ncontext);
  104. pfree(cache);
  105. avc_num_caches--;
  106. }
  107. else
  108. {
  109. cache->hot_cache = false;
  110. prev = cell;
  111. }
  112. }
  113. avc_lru_hint = (avc_lru_hint + 1) % AVC_NUM_SLOTS;
  114. }
  115. }
  116. /* -------------------------------------------------------------------------
  117. *
  118. * sepgsql_avc_check_valid
  119. *
  120. * This function checks whether the cached entries are still valid. If
  121. * the security policy has been reloaded (or any other events that requires
  122. * resetting userspace caches has occurred) since the last reference to
  123. * the access vector cache, we must flush the cache.
  124. *
  125. * Access control decisions must be atomic, but multiple system calls may
  126. * be required to make a decision; thus, when referencing the access vector
  127. * cache, we must loop until we complete without an intervening cache flush
  128. * event. In practice, looping even once should be very rare. Callers should
  129. * do something like this:
  130. *
  131. * sepgsql_avc_check_valid();
  132. * do {
  133. * :
  134. * <reference to uavc>
  135. * :
  136. * } while (!sepgsql_avc_check_valid())
  137. *
  138. * -------------------------------------------------------------------------
  139. */
  140. static bool
  141. sepgsql_avc_check_valid(void)
  142. {
  143. if (selinux_status_updated() > 0)
  144. {
  145. sepgsql_avc_reset();
  146. return false;
  147. }
  148. return true;
  149. }
  150. /*
  151. * sepgsql_avc_unlabeled
  152. *
  153. * Returns an alternative label to be applied when no label or an invalid
  154. * label would otherwise be assigned.
  155. */
  156. static char *
  157. sepgsql_avc_unlabeled(void)
  158. {
  159. if (!avc_unlabeled)
  160. {
  161. security_context_t unlabeled;
  162. if (security_get_initial_context_raw("unlabeled", &unlabeled) < 0)
  163. ereport(ERROR,
  164. (errcode(ERRCODE_INTERNAL_ERROR),
  165. errmsg("SELinux: failed to get initial security label: %m")));
  166. PG_TRY();
  167. {
  168. avc_unlabeled = MemoryContextStrdup(avc_mem_cxt, unlabeled);
  169. }
  170. PG_CATCH();
  171. {
  172. freecon(unlabeled);
  173. PG_RE_THROW();
  174. }
  175. PG_END_TRY();
  176. freecon(unlabeled);
  177. }
  178. return avc_unlabeled;
  179. }
  180. /*
  181. * sepgsql_avc_compute
  182. *
  183. * A fallback path, when cache mishit. It asks SELinux its access control
  184. * decision for the supplied pair of security context and object class.
  185. */
  186. static avc_cache *
  187. sepgsql_avc_compute(const char *scontext, const char *tcontext, uint16 tclass)
  188. {
  189. char *ucontext = NULL;
  190. char *ncontext = NULL;
  191. MemoryContext oldctx;
  192. avc_cache *cache;
  193. uint32 hash;
  194. int index;
  195. struct av_decision avd;
  196. hash = sepgsql_avc_hash(scontext, tcontext, tclass);
  197. index = hash % AVC_NUM_SLOTS;
  198. /*
  199. * Validation check of the supplied security context. Because it always
  200. * invoke system-call, frequent check should be avoided. Unless security
  201. * policy is reloaded, validation status shall be kept, so we also cache
  202. * whether the supplied security context was valid, or not.
  203. */
  204. if (security_check_context_raw((security_context_t) tcontext) != 0)
  205. ucontext = sepgsql_avc_unlabeled();
  206. /*
  207. * Ask SELinux its access control decision
  208. */
  209. if (!ucontext)
  210. sepgsql_compute_avd(scontext, tcontext, tclass, &avd);
  211. else
  212. sepgsql_compute_avd(scontext, ucontext, tclass, &avd);
  213. /*
  214. * It also caches a security label to be switched when a client labeled as
  215. * 'scontext' executes a procedure labeled as 'tcontext', not only access
  216. * control decision on the procedure. The security label to be switched
  217. * shall be computed uniquely on a pair of 'scontext' and 'tcontext',
  218. * thus, it is reasonable to cache the new label on avc, and enables to
  219. * reduce unnecessary system calls. It shall be referenced at
  220. * sepgsql_needs_fmgr_hook to check whether the supplied function is a
  221. * trusted procedure, or not.
  222. */
  223. if (tclass == SEPG_CLASS_DB_PROCEDURE)
  224. {
  225. if (!ucontext)
  226. ncontext = sepgsql_compute_create(scontext, tcontext,
  227. SEPG_CLASS_PROCESS, NULL);
  228. else
  229. ncontext = sepgsql_compute_create(scontext, ucontext,
  230. SEPG_CLASS_PROCESS, NULL);
  231. if (strcmp(scontext, ncontext) == 0)
  232. {
  233. pfree(ncontext);
  234. ncontext = NULL;
  235. }
  236. }
  237. /*
  238. * Set up an avc_cache object
  239. */
  240. oldctx = MemoryContextSwitchTo(avc_mem_cxt);
  241. cache = palloc0(sizeof(avc_cache));
  242. cache->hash = hash;
  243. cache->scontext = pstrdup(scontext);
  244. cache->tcontext = pstrdup(tcontext);
  245. cache->tclass = tclass;
  246. cache->allowed = avd.allowed;
  247. cache->auditallow = avd.auditallow;
  248. cache->auditdeny = avd.auditdeny;
  249. cache->hot_cache = true;
  250. if (avd.flags & SELINUX_AVD_FLAGS_PERMISSIVE)
  251. cache->permissive = true;
  252. if (!ucontext)
  253. cache->tcontext_is_valid = true;
  254. if (ncontext)
  255. cache->ncontext = pstrdup(ncontext);
  256. avc_num_caches++;
  257. if (avc_num_caches > avc_threshold)
  258. sepgsql_avc_reclaim();
  259. avc_slots[index] = lcons(cache, avc_slots[index]);
  260. MemoryContextSwitchTo(oldctx);
  261. return cache;
  262. }
  263. /*
  264. * sepgsql_avc_lookup
  265. *
  266. * Look up a cache entry that matches the supplied security contexts and
  267. * object class. If not found, create a new cache entry.
  268. */
  269. static avc_cache *
  270. sepgsql_avc_lookup(const char *scontext, const char *tcontext, uint16 tclass)
  271. {
  272. avc_cache *cache;
  273. ListCell *cell;
  274. uint32 hash;
  275. int index;
  276. hash = sepgsql_avc_hash(scontext, tcontext, tclass);
  277. index = hash % AVC_NUM_SLOTS;
  278. foreach(cell, avc_slots[index])
  279. {
  280. cache = lfirst(cell);
  281. if (cache->hash == hash &&
  282. cache->tclass == tclass &&
  283. strcmp(cache->tcontext, tcontext) == 0 &&
  284. strcmp(cache->scontext, scontext) == 0)
  285. {
  286. cache->hot_cache = true;
  287. return cache;
  288. }
  289. }
  290. /* not found, so insert a new cache */
  291. return sepgsql_avc_compute(scontext, tcontext, tclass);
  292. }
  293. /*
  294. * sepgsql_avc_check_perms(_label)
  295. *
  296. * It returns 'true', if the security policy suggested to allow the required
  297. * permissions. Otherwise, it returns 'false' or raises an error according
  298. * to the 'abort_on_violation' argument.
  299. * The 'tobject' and 'tclass' identify the target object being referenced,
  300. * and 'required' is a bitmask of permissions (SEPG_*__*) defined for each
  301. * object classes.
  302. * The 'audit_name' is the object name (optional). If SEPGSQL_AVC_NOAUDIT
  303. * was supplied, it means to skip all the audit messages.
  304. */
  305. bool
  306. sepgsql_avc_check_perms_label(const char *tcontext,
  307. uint16 tclass, uint32 required,
  308. const char *audit_name,
  309. bool abort_on_violation)
  310. {
  311. char *scontext = sepgsql_get_client_label();
  312. avc_cache *cache;
  313. uint32 denied;
  314. uint32 audited;
  315. bool result;
  316. sepgsql_avc_check_valid();
  317. do
  318. {
  319. result = true;
  320. /*
  321. * If the target object is unlabeled, we perform the check using the
  322. * label supplied by sepgsql_avc_unlabeled().
  323. */
  324. if (tcontext)
  325. cache = sepgsql_avc_lookup(scontext, tcontext, tclass);
  326. else
  327. cache = sepgsql_avc_lookup(scontext,
  328. sepgsql_avc_unlabeled(), tclass);
  329. denied = required & ~cache->allowed;
  330. /*
  331. * Compute permissions to be audited
  332. */
  333. if (sepgsql_get_debug_audit())
  334. audited = (denied ? (denied & ~0) : (required & ~0));
  335. else
  336. audited = denied ? (denied & cache->auditdeny)
  337. : (required & cache->auditallow);
  338. if (denied)
  339. {
  340. /*
  341. * In permissive mode or permissive domain, violated permissions
  342. * shall be audited to the log files at once, and then implicitly
  343. * allowed to avoid a flood of access denied logs, because the
  344. * purpose of permissive mode/domain is to collect a violation log
  345. * that will make it possible to fix up the security policy.
  346. */
  347. if (!sepgsql_getenforce() || cache->permissive)
  348. cache->allowed |= required;
  349. else
  350. result = false;
  351. }
  352. } while (!sepgsql_avc_check_valid());
  353. /*
  354. * In the case when we have something auditable actions here,
  355. * sepgsql_audit_log shall be called with text representation of security
  356. * labels for both of subject and object. It records this access
  357. * violation, so DBA will be able to find out unexpected security problems
  358. * later.
  359. */
  360. if (audited != 0 &&
  361. audit_name != SEPGSQL_AVC_NOAUDIT &&
  362. sepgsql_get_mode() != SEPGSQL_MODE_INTERNAL)
  363. {
  364. sepgsql_audit_log(denied != 0,
  365. cache->scontext,
  366. cache->tcontext_is_valid ?
  367. cache->tcontext : sepgsql_avc_unlabeled(),
  368. cache->tclass,
  369. audited,
  370. audit_name);
  371. }
  372. if (abort_on_violation && !result)
  373. ereport(ERROR,
  374. (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
  375. errmsg("SELinux: security policy violation")));
  376. return result;
  377. }
  378. bool
  379. sepgsql_avc_check_perms(const ObjectAddress *tobject,
  380. uint16 tclass, uint32 required,
  381. const char *audit_name,
  382. bool abort_on_violation)
  383. {
  384. char *tcontext = GetSecurityLabel(tobject, SEPGSQL_LABEL_TAG);
  385. bool rc;
  386. rc = sepgsql_avc_check_perms_label(tcontext,
  387. tclass, required,
  388. audit_name, abort_on_violation);
  389. if (tcontext)
  390. pfree(tcontext);
  391. return rc;
  392. }
  393. /*
  394. * sepgsql_avc_trusted_proc
  395. *
  396. * If the supplied function OID is configured as a trusted procedure, this
  397. * function will return a security label to be used during the execution of
  398. * that function. Otherwise, it returns NULL.
  399. */
  400. char *
  401. sepgsql_avc_trusted_proc(Oid functionId)
  402. {
  403. char *scontext = sepgsql_get_client_label();
  404. char *tcontext;
  405. ObjectAddress tobject;
  406. avc_cache *cache;
  407. tobject.classId = ProcedureRelationId;
  408. tobject.objectId = functionId;
  409. tobject.objectSubId = 0;
  410. tcontext = GetSecurityLabel(&tobject, SEPGSQL_LABEL_TAG);
  411. sepgsql_avc_check_valid();
  412. do
  413. {
  414. if (tcontext)
  415. cache = sepgsql_avc_lookup(scontext, tcontext,
  416. SEPG_CLASS_DB_PROCEDURE);
  417. else
  418. cache = sepgsql_avc_lookup(scontext, sepgsql_avc_unlabeled(),
  419. SEPG_CLASS_DB_PROCEDURE);
  420. } while (!sepgsql_avc_check_valid());
  421. return cache->ncontext;
  422. }
  423. /*
  424. * sepgsql_avc_exit
  425. *
  426. * Clean up userspace AVC on process exit.
  427. */
  428. static void
  429. sepgsql_avc_exit(int code, Datum arg)
  430. {
  431. selinux_status_close();
  432. }
  433. /*
  434. * sepgsql_avc_init
  435. *
  436. * Initialize the userspace AVC. This should be called from _PG_init.
  437. */
  438. void
  439. sepgsql_avc_init(void)
  440. {
  441. int rc;
  442. /*
  443. * All the avc stuff shall be allocated in avc_mem_cxt
  444. */
  445. avc_mem_cxt = AllocSetContextCreate(TopMemoryContext,
  446. "userspace access vector cache",
  447. ALLOCSET_DEFAULT_SIZES);
  448. memset(avc_slots, 0, sizeof(avc_slots));
  449. avc_num_caches = 0;
  450. avc_lru_hint = 0;
  451. avc_threshold = AVC_DEF_THRESHOLD;
  452. /*
  453. * SELinux allows to mmap(2) its kernel status page in read-only mode to
  454. * inform userspace applications its status updating (such as policy
  455. * reloading) without system-call invocations. This feature is only
  456. * supported in Linux-2.6.38 or later, however, libselinux provides a
  457. * fallback mode to know its status using netlink sockets.
  458. */
  459. rc = selinux_status_open(1);
  460. if (rc < 0)
  461. ereport(ERROR,
  462. (errcode(ERRCODE_INTERNAL_ERROR),
  463. errmsg("SELinux: could not open selinux status : %m")));
  464. else if (rc > 0)
  465. ereport(LOG,
  466. (errmsg("SELinux: kernel status page uses fallback mode")));
  467. /* Arrange to close selinux status page on process exit. */
  468. on_proc_exit(sepgsql_avc_exit, 0);
  469. }